lc3-google/src/mdct.c

9a19cd78SMatthias Ringwald/******************************************************************************
9a19cd78SMatthias Ringwald *
*4930cef6SMatthias Ringwald *  Copyright 2022 Google LLC
9a19cd78SMatthias Ringwald *
9a19cd78SMatthias Ringwald *  Licensed under the Apache License, Version 2.0 (the "License");
9a19cd78SMatthias Ringwald *  you may not use this file except in compliance with the License.
9a19cd78SMatthias Ringwald *  You may obtain a copy of the License at:
9a19cd78SMatthias Ringwald *
9a19cd78SMatthias Ringwald *  http://www.apache.org/licenses/LICENSE-2.0
9a19cd78SMatthias Ringwald *
9a19cd78SMatthias Ringwald *  Unless required by applicable law or agreed to in writing, software
9a19cd78SMatthias Ringwald *  distributed under the License is distributed on an "AS IS" BASIS,
9a19cd78SMatthias Ringwald *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9a19cd78SMatthias Ringwald *  See the License for the specific language governing permissions and
9a19cd78SMatthias Ringwald *  limitations under the License.
9a19cd78SMatthias Ringwald *
9a19cd78SMatthias Ringwald ******************************************************************************/
9a19cd78SMatthias Ringwald
9a19cd78SMatthias Ringwald#include "tables.h"
9a19cd78SMatthias Ringwald
*4930cef6SMatthias Ringwald#include "mdct_neon.h"
*4930cef6SMatthias Ringwald
9a19cd78SMatthias Ringwald
9a19cd78SMatthias Ringwald/* ----------------------------------------------------------------------------
9a19cd78SMatthias Ringwald *  FFT processing
9a19cd78SMatthias Ringwald * -------------------------------------------------------------------------- */
9a19cd78SMatthias Ringwald
9a19cd78SMatthias Ringwald/**
*4930cef6SMatthias Ringwald * FFT 5 Points
9a19cd78SMatthias Ringwald * x, y            Input and output coefficients, of size 5xn
*4930cef6SMatthias Ringwald * n               Number of interleaved transform to perform (n % 2 = 0)
9a19cd78SMatthias Ringwald */
*4930cef6SMatthias Ringwald#ifndef fft_5
*4930cef6SMatthias RingwaldLC3_HOT static inline void fft_5(
9a19cd78SMatthias Ringwald    const struct lc3_complex *x, struct lc3_complex *y, int n)
9a19cd78SMatthias Ringwald{
9a19cd78SMatthias Ringwald    static const float cos1 =  0.3090169944;  /* cos(-2Pi 1/5) */
9a19cd78SMatthias Ringwald    static const float cos2 = -0.8090169944;  /* cos(-2Pi 2/5) */
9a19cd78SMatthias Ringwald
9a19cd78SMatthias Ringwald    static const float sin1 = -0.9510565163;  /* sin(-2Pi 1/5) */
9a19cd78SMatthias Ringwald    static const float sin2 = -0.5877852523;  /* sin(-2Pi 2/5) */
9a19cd78SMatthias Ringwald
9a19cd78SMatthias Ringwald    for (int i = 0; i < n; i++, x++, y+= 5) {
9a19cd78SMatthias Ringwald
9a19cd78SMatthias Ringwald        struct lc3_complex s14 =
9a19cd78SMatthias Ringwald            { x[1*n].re + x[4*n].re, x[1*n].im + x[4*n].im };
9a19cd78SMatthias Ringwald        struct lc3_complex d14 =
9a19cd78SMatthias Ringwald            { x[1*n].re - x[4*n].re, x[1*n].im - x[4*n].im };
9a19cd78SMatthias Ringwald
9a19cd78SMatthias Ringwald        struct lc3_complex s23 =
9a19cd78SMatthias Ringwald            { x[2*n].re + x[3*n].re, x[2*n].im + x[3*n].im };
9a19cd78SMatthias Ringwald        struct lc3_complex d23 =
9a19cd78SMatthias Ringwald            { x[2*n].re - x[3*n].re, x[2*n].im - x[3*n].im };
9a19cd78SMatthias Ringwald
9a19cd78SMatthias Ringwald        y[0].re = x[0].re + s14.re + s23.re;
*4930cef6SMatthias Ringwald
9a19cd78SMatthias Ringwald        y[0].im = x[0].im + s14.im + s23.im;
9a19cd78SMatthias Ringwald
*4930cef6SMatthias Ringwald        y[1].re = x[0].re + s14.re * cos1 - d14.im * sin1
*4930cef6SMatthias Ringwald                          + s23.re * cos2 - d23.im * sin2;
9a19cd78SMatthias Ringwald
*4930cef6SMatthias Ringwald        y[1].im = x[0].im + s14.im * cos1 + d14.re * sin1
*4930cef6SMatthias Ringwald                          + s23.im * cos2 + d23.re * sin2;
9a19cd78SMatthias Ringwald
*4930cef6SMatthias Ringwald        y[2].re = x[0].re + s14.re * cos2 - d14.im * sin2
*4930cef6SMatthias Ringwald                          + s23.re * cos1 + d23.im * sin1;
9a19cd78SMatthias Ringwald
*4930cef6SMatthias Ringwald        y[2].im = x[0].im + s14.im * cos2 + d14.re * sin2
*4930cef6SMatthias Ringwald                          + s23.im * cos1 - d23.re * sin1;
9a19cd78SMatthias Ringwald
*4930cef6SMatthias Ringwald        y[3].re = x[0].re + s14.re * cos2 + d14.im * sin2
*4930cef6SMatthias Ringwald                          + s23.re * cos1 - d23.im * sin1;
9a19cd78SMatthias Ringwald
*4930cef6SMatthias Ringwald        y[3].im = x[0].im + s14.im * cos2 - d14.re * sin2
*4930cef6SMatthias Ringwald                          + s23.im * cos1 + d23.re * sin1;
9a19cd78SMatthias Ringwald
*4930cef6SMatthias Ringwald        y[4].re = x[0].re + s14.re * cos1 + d14.im * sin1
*4930cef6SMatthias Ringwald                          + s23.re * cos2 + d23.im * sin2;
9a19cd78SMatthias Ringwald
*4930cef6SMatthias Ringwald        y[4].im = x[0].im + s14.im * cos1 - d14.re * sin1
*4930cef6SMatthias Ringwald                          + s23.im * cos2 - d23.re * sin2;
9a19cd78SMatthias Ringwald    }
9a19cd78SMatthias Ringwald}
*4930cef6SMatthias Ringwald#endif /* fft_5 */
9a19cd78SMatthias Ringwald
9a19cd78SMatthias Ringwald/**
*4930cef6SMatthias Ringwald * FFT Butterfly 3 Points
9a19cd78SMatthias Ringwald * x, y            Input and output coefficients
9a19cd78SMatthias Ringwald * twiddles        Twiddles factors, determine size of transform
9a19cd78SMatthias Ringwald * n               Number of interleaved transforms
9a19cd78SMatthias Ringwald */
*4930cef6SMatthias Ringwald#ifndef fft_bf3
*4930cef6SMatthias RingwaldLC3_HOT static inline void fft_bf3(
*4930cef6SMatthias Ringwald    const struct lc3_fft_bf3_twiddles *twiddles,
9a19cd78SMatthias Ringwald    const struct lc3_complex *x, struct lc3_complex *y, int n)
9a19cd78SMatthias Ringwald{
9a19cd78SMatthias Ringwald    int n3 = twiddles->n3;
9a19cd78SMatthias Ringwald    const struct lc3_complex (*w0)[2] = twiddles->t;
9a19cd78SMatthias Ringwald    const struct lc3_complex (*w1)[2] = w0 + n3, (*w2)[2] = w1 + n3;
9a19cd78SMatthias Ringwald
9a19cd78SMatthias Ringwald    const struct lc3_complex *x0 = x, *x1 = x0 + n*n3, *x2 = x1 + n*n3;
9a19cd78SMatthias Ringwald    struct lc3_complex *y0 = y, *y1 = y0 + n3, *y2 = y1 + n3;
9a19cd78SMatthias Ringwald
*4930cef6SMatthias Ringwald    for (int i = 0; i < n; i++, y0 += 3*n3, y1 += 3*n3, y2 += 3*n3)
9a19cd78SMatthias Ringwald        for (int j = 0; j < n3; j++, x0++, x1++, x2++) {
9a19cd78SMatthias Ringwald
*4930cef6SMatthias Ringwald            y0[j].re = x0->re + x1->re * w0[j][0].re - x1->im * w0[j][0].im
*4930cef6SMatthias Ringwald                              + x2->re * w0[j][1].re - x2->im * w0[j][1].im;
9a19cd78SMatthias Ringwald
*4930cef6SMatthias Ringwald            y0[j].im = x0->im + x1->im * w0[j][0].re + x1->re * w0[j][0].im
*4930cef6SMatthias Ringwald                              + x2->im * w0[j][1].re + x2->re * w0[j][1].im;
9a19cd78SMatthias Ringwald
*4930cef6SMatthias Ringwald            y1[j].re = x0->re + x1->re * w1[j][0].re - x1->im * w1[j][0].im
*4930cef6SMatthias Ringwald                              + x2->re * w1[j][1].re - x2->im * w1[j][1].im;
9a19cd78SMatthias Ringwald
*4930cef6SMatthias Ringwald            y1[j].im = x0->im + x1->im * w1[j][0].re + x1->re * w1[j][0].im
*4930cef6SMatthias Ringwald                              + x2->im * w1[j][1].re + x2->re * w1[j][1].im;
9a19cd78SMatthias Ringwald
*4930cef6SMatthias Ringwald            y2[j].re = x0->re + x1->re * w2[j][0].re - x1->im * w2[j][0].im
*4930cef6SMatthias Ringwald                              + x2->re * w2[j][1].re - x2->im * w2[j][1].im;
9a19cd78SMatthias Ringwald
*4930cef6SMatthias Ringwald            y2[j].im = x0->im + x1->im * w2[j][0].re + x1->re * w2[j][0].im
*4930cef6SMatthias Ringwald                              + x2->im * w2[j][1].re + x2->re * w2[j][1].im;
9a19cd78SMatthias Ringwald        }
9a19cd78SMatthias Ringwald}
*4930cef6SMatthias Ringwald#endif /* fft_bf3 */
9a19cd78SMatthias Ringwald
9a19cd78SMatthias Ringwald/**
*4930cef6SMatthias Ringwald * FFT Butterfly 2 Points
9a19cd78SMatthias Ringwald * twiddles        Twiddles factors, determine size of transform
9a19cd78SMatthias Ringwald * x, y            Input and output coefficients
9a19cd78SMatthias Ringwald * n               Number of interleaved transforms
9a19cd78SMatthias Ringwald */
*4930cef6SMatthias Ringwald#ifndef fft_bf2
*4930cef6SMatthias RingwaldLC3_HOT static inline void fft_bf2(
*4930cef6SMatthias Ringwald    const struct lc3_fft_bf2_twiddles *twiddles,
9a19cd78SMatthias Ringwald    const struct lc3_complex *x, struct lc3_complex *y, int n)
9a19cd78SMatthias Ringwald{
9a19cd78SMatthias Ringwald    int n2 = twiddles->n2;
9a19cd78SMatthias Ringwald    const struct lc3_complex *w = twiddles->t;
9a19cd78SMatthias Ringwald
9a19cd78SMatthias Ringwald    const struct lc3_complex *x0 = x, *x1 = x0 + n*n2;
9a19cd78SMatthias Ringwald    struct lc3_complex *y0 = y, *y1 = y0 + n2;
9a19cd78SMatthias Ringwald
9a19cd78SMatthias Ringwald    for (int i = 0; i < n; i++, y0 += 2*n2, y1 += 2*n2) {
9a19cd78SMatthias Ringwald
9a19cd78SMatthias Ringwald        for (int j = 0; j < n2; j++, x0++, x1++) {
9a19cd78SMatthias Ringwald
*4930cef6SMatthias Ringwald            y0[j].re = x0->re + x1->re * w[j].re - x1->im * w[j].im;
*4930cef6SMatthias Ringwald            y0[j].im = x0->im + x1->im * w[j].re + x1->re * w[j].im;
9a19cd78SMatthias Ringwald
*4930cef6SMatthias Ringwald            y1[j].re = x0->re - x1->re * w[j].re + x1->im * w[j].im;
*4930cef6SMatthias Ringwald            y1[j].im = x0->im - x1->im * w[j].re - x1->re * w[j].im;
9a19cd78SMatthias Ringwald        }
9a19cd78SMatthias Ringwald    }
9a19cd78SMatthias Ringwald}
*4930cef6SMatthias Ringwald#endif /* fft_bf2 */
9a19cd78SMatthias Ringwald
9a19cd78SMatthias Ringwald/**
9a19cd78SMatthias Ringwald * Perform FFT
9a19cd78SMatthias Ringwald * x, y0, y1       Input, and 2 scratch buffers of size `n`
9a19cd78SMatthias Ringwald * n               Number of points 30, 40, 60, 80, 90, 120, 160, 180, 240
9a19cd78SMatthias Ringwald * return          The buffer `y0` or `y1` that hold the result
9a19cd78SMatthias Ringwald *
9a19cd78SMatthias Ringwald * Input `x` can be the same as the `y0` second scratch buffer
9a19cd78SMatthias Ringwald */
*4930cef6SMatthias Ringwaldstatic struct lc3_complex *fft(const struct lc3_complex *x, int n,
9a19cd78SMatthias Ringwald    struct lc3_complex *y0, struct lc3_complex *y1)
9a19cd78SMatthias Ringwald{
9a19cd78SMatthias Ringwald    struct lc3_complex *y[2] = { y1, y0 };
9a19cd78SMatthias Ringwald    int i2, i3, is = 0;
9a19cd78SMatthias Ringwald
9a19cd78SMatthias Ringwald    /* The number of points `n` can be decomposed as :
9a19cd78SMatthias Ringwald     *
9a19cd78SMatthias Ringwald     *   n = 5^1 * 3^n3 * 2^n2
9a19cd78SMatthias Ringwald     *
9a19cd78SMatthias Ringwald     *   for n = 40, 80, 160        n3 = 0, n2 = [3..5]
9a19cd78SMatthias Ringwald     *       n = 30, 60, 120, 240   n3 = 1, n2 = [1..4]
9a19cd78SMatthias Ringwald     *       n = 90, 180            n3 = 2, n2 = [1..2]
9a19cd78SMatthias Ringwald     *
9a19cd78SMatthias Ringwald     * Note that the expression `n & (n-1) == 0` is equivalent
9a19cd78SMatthias Ringwald     * to the check that `n` is a power of 2. */
9a19cd78SMatthias Ringwald
*4930cef6SMatthias Ringwald    fft_5(x, y[is], n /= 5);
9a19cd78SMatthias Ringwald
9a19cd78SMatthias Ringwald    for (i3 = 0; n & (n-1); i3++, is ^= 1)
*4930cef6SMatthias Ringwald        fft_bf3(lc3_fft_twiddles_bf3[i3], y[is], y[is ^ 1], n /= 3);
9a19cd78SMatthias Ringwald
9a19cd78SMatthias Ringwald    for (i2 = 0; n > 1; i2++, is ^= 1)
*4930cef6SMatthias Ringwald        fft_bf2(lc3_fft_twiddles_bf2[i2][i3], y[is], y[is ^ 1], n >>= 1);
9a19cd78SMatthias Ringwald
9a19cd78SMatthias Ringwald    return y[is];
9a19cd78SMatthias Ringwald}
9a19cd78SMatthias Ringwald
9a19cd78SMatthias Ringwald
9a19cd78SMatthias Ringwald/* ----------------------------------------------------------------------------
9a19cd78SMatthias Ringwald *  MDCT processing
9a19cd78SMatthias Ringwald * -------------------------------------------------------------------------- */
9a19cd78SMatthias Ringwald
9a19cd78SMatthias Ringwald/**
9a19cd78SMatthias Ringwald * Windowing of samples before MDCT
9a19cd78SMatthias Ringwald * dt, sr          Duration and samplerate (size of the transform)
*4930cef6SMatthias Ringwald * x, y            Input current and delayed samples
*4930cef6SMatthias Ringwald * y, d            Output windowed samples, and delayed ones
9a19cd78SMatthias Ringwald */
*4930cef6SMatthias RingwaldLC3_HOT static void mdct_window(enum lc3_dt dt, enum lc3_srate sr,
*4930cef6SMatthias Ringwald    const float *x, float *d, float *y)
9a19cd78SMatthias Ringwald{
9a19cd78SMatthias Ringwald    int ns = LC3_NS(dt, sr), nd = LC3_ND(dt, sr);
9a19cd78SMatthias Ringwald
9a19cd78SMatthias Ringwald    const float *w0 = lc3_mdct_win[dt][sr], *w1 = w0 + ns;
9a19cd78SMatthias Ringwald    const float *w2 = w1, *w3 = w2 + nd;
9a19cd78SMatthias Ringwald
*4930cef6SMatthias Ringwald    const float *x0 = x + ns-nd, *x1 = x0;
9a19cd78SMatthias Ringwald    float *y0 = y + ns/2, *y1 = y0;
*4930cef6SMatthias Ringwald    float *d0 = d, *d1 = d + nd;
9a19cd78SMatthias Ringwald
*4930cef6SMatthias Ringwald    while (x1 > x) {
*4930cef6SMatthias Ringwald        *(--y0) = *d0 * *(w0++) - *(--x1) * *(--w1);
*4930cef6SMatthias Ringwald        *(y1++) = (*(d0++) = *(x0++)) * *(w2++);
9a19cd78SMatthias Ringwald
*4930cef6SMatthias Ringwald        *(--y0) = *d0 * *(w0++) - *(--x1) * *(--w1);
*4930cef6SMatthias Ringwald        *(y1++) = (*(d0++) = *(x0++)) * *(w2++);
*4930cef6SMatthias Ringwald    }
9a19cd78SMatthias Ringwald
*4930cef6SMatthias Ringwald    for (x1 += ns; x0 < x1; ) {
*4930cef6SMatthias Ringwald        *(--y0) = *d0 * *(w0++) - *(--d1) * *(--w1);
*4930cef6SMatthias Ringwald        *(y1++) = (*(d0++) = *(x0++)) * *(w2++) + (*d1 = *(--x1)) * *(--w3);
*4930cef6SMatthias Ringwald
*4930cef6SMatthias Ringwald        *(--y0) = *d0 * *(w0++) - *(--d1) * *(--w1);
*4930cef6SMatthias Ringwald        *(y1++) = (*(d0++) = *(x0++)) * *(w2++) + (*d1 = *(--x1)) * *(--w3);
*4930cef6SMatthias Ringwald    }
9a19cd78SMatthias Ringwald}
9a19cd78SMatthias Ringwald
9a19cd78SMatthias Ringwald/**
9a19cd78SMatthias Ringwald * Pre-rotate MDCT coefficients of N/2 points, before FFT N/4 points FFT
9a19cd78SMatthias Ringwald * def             Size and twiddles factors
9a19cd78SMatthias Ringwald * x, y            Input and output coefficients
9a19cd78SMatthias Ringwald *
9a19cd78SMatthias Ringwald * `x` and y` can be the same buffer
9a19cd78SMatthias Ringwald */
*4930cef6SMatthias RingwaldLC3_HOT static void mdct_pre_fft(const struct lc3_mdct_rot_def *def,
9a19cd78SMatthias Ringwald    const float *x, struct lc3_complex *y)
9a19cd78SMatthias Ringwald{
9a19cd78SMatthias Ringwald    int n4 = def->n4;
9a19cd78SMatthias Ringwald
9a19cd78SMatthias Ringwald    const float *x0 = x, *x1 = x0 + 2*n4;
9a19cd78SMatthias Ringwald    const struct lc3_complex *w0 = def->w, *w1 = w0 + n4;
9a19cd78SMatthias Ringwald    struct lc3_complex *y0 = y, *y1 = y0 + n4;
9a19cd78SMatthias Ringwald
9a19cd78SMatthias Ringwald    while (x0 < x1) {
9a19cd78SMatthias Ringwald        struct lc3_complex u, uw = *(w0++);
9a19cd78SMatthias Ringwald        u.re = - *(--x1) * uw.re + *x0 * uw.im;
9a19cd78SMatthias Ringwald        u.im =   *(x0++) * uw.re + *x1 * uw.im;
9a19cd78SMatthias Ringwald
9a19cd78SMatthias Ringwald        struct lc3_complex v, vw = *(--w1);
9a19cd78SMatthias Ringwald        v.re = - *(--x1) * vw.im + *x0 * vw.re;
9a19cd78SMatthias Ringwald        v.im = - *(x0++) * vw.im - *x1 * vw.re;
9a19cd78SMatthias Ringwald
9a19cd78SMatthias Ringwald        *(y0++) = u;
9a19cd78SMatthias Ringwald        *(--y1) = v;
9a19cd78SMatthias Ringwald    }
9a19cd78SMatthias Ringwald}
9a19cd78SMatthias Ringwald
9a19cd78SMatthias Ringwald/**
9a19cd78SMatthias Ringwald * Post-rotate FFT N/4 points coefficients, resulting MDCT N points
9a19cd78SMatthias Ringwald * def             Size and twiddles factors
9a19cd78SMatthias Ringwald * x, y            Input and output coefficients
9a19cd78SMatthias Ringwald * scale           Scale on output coefficients
9a19cd78SMatthias Ringwald *
9a19cd78SMatthias Ringwald * `x` and y` can be the same buffer
9a19cd78SMatthias Ringwald */
*4930cef6SMatthias RingwaldLC3_HOT static void mdct_post_fft(const struct lc3_mdct_rot_def *def,
9a19cd78SMatthias Ringwald    const struct lc3_complex *x, float *y, float scale)
9a19cd78SMatthias Ringwald{
9a19cd78SMatthias Ringwald    int n4 = def->n4, n8 = n4 >> 1;
9a19cd78SMatthias Ringwald
9a19cd78SMatthias Ringwald    const struct lc3_complex *w0 = def->w + n8, *w1 = w0 - 1;
9a19cd78SMatthias Ringwald    const struct lc3_complex *x0 = x + n8, *x1 = x0 - 1;
9a19cd78SMatthias Ringwald
9a19cd78SMatthias Ringwald    float *y0 = y + n4, *y1 = y0;
9a19cd78SMatthias Ringwald
9a19cd78SMatthias Ringwald    for ( ; y1 > y; x0++, x1--, w0++, w1--) {
9a19cd78SMatthias Ringwald
9a19cd78SMatthias Ringwald        float u0 = (x0->im * w0->im + x0->re * w0->re) * scale;
9a19cd78SMatthias Ringwald        float u1 = (x1->re * w1->im - x1->im * w1->re) * scale;
9a19cd78SMatthias Ringwald
9a19cd78SMatthias Ringwald        float v0 = (x0->re * w0->im - x0->im * w0->re) * scale;
9a19cd78SMatthias Ringwald        float v1 = (x1->im * w1->im + x1->re * w1->re) * scale;
9a19cd78SMatthias Ringwald
9a19cd78SMatthias Ringwald        *(y0++) = u0;  *(y0++) = u1;
9a19cd78SMatthias Ringwald        *(--y1) = v0;  *(--y1) = v1;
9a19cd78SMatthias Ringwald    }
9a19cd78SMatthias Ringwald}
9a19cd78SMatthias Ringwald
9a19cd78SMatthias Ringwald/**
9a19cd78SMatthias Ringwald * Pre-rotate IMDCT coefficients of N points, before FFT N/4 points FFT
9a19cd78SMatthias Ringwald * def             Size and twiddles factors
9a19cd78SMatthias Ringwald * x, y            Input and output coefficients
9a19cd78SMatthias Ringwald *
*4930cef6SMatthias Ringwald * `x` and `y` can be the same buffer
*4930cef6SMatthias Ringwald * The real and imaginary parts of `y` are swapped,
*4930cef6SMatthias Ringwald * to operate on FFT instead of IFFT
9a19cd78SMatthias Ringwald */
*4930cef6SMatthias RingwaldLC3_HOT static void imdct_pre_fft(const struct lc3_mdct_rot_def *def,
9a19cd78SMatthias Ringwald    const float *x, struct lc3_complex *y)
9a19cd78SMatthias Ringwald{
9a19cd78SMatthias Ringwald    int n4 = def->n4;
9a19cd78SMatthias Ringwald
9a19cd78SMatthias Ringwald    const float *x0 = x, *x1 = x0 + 2*n4;
9a19cd78SMatthias Ringwald
9a19cd78SMatthias Ringwald    const struct lc3_complex *w0 = def->w, *w1 = w0 + n4;
9a19cd78SMatthias Ringwald    struct lc3_complex *y0 = y, *y1 = y0 + n4;
9a19cd78SMatthias Ringwald
9a19cd78SMatthias Ringwald    while (x0 < x1) {
9a19cd78SMatthias Ringwald        float u0 = *(x0++), u1 = *(--x1);
9a19cd78SMatthias Ringwald        float v0 = *(x0++), v1 = *(--x1);
9a19cd78SMatthias Ringwald        struct lc3_complex uw = *(w0++), vw = *(--w1);
9a19cd78SMatthias Ringwald
*4930cef6SMatthias Ringwald        (y0  )->re = - u0 * uw.re - u1 * uw.im;
*4930cef6SMatthias Ringwald        (y0++)->im = - u1 * uw.re + u0 * uw.im;
9a19cd78SMatthias Ringwald
*4930cef6SMatthias Ringwald        (--y1)->re = - v1 * vw.re - v0 * vw.im;
*4930cef6SMatthias Ringwald        (  y1)->im = - v0 * vw.re + v1 * vw.im;
9a19cd78SMatthias Ringwald    }
9a19cd78SMatthias Ringwald}
9a19cd78SMatthias Ringwald
9a19cd78SMatthias Ringwald/**
9a19cd78SMatthias Ringwald * Post-rotate FFT N/4 points coefficients, resulting IMDCT N points
9a19cd78SMatthias Ringwald * def             Size and twiddles factors
9a19cd78SMatthias Ringwald * x, y            Input and output coefficients
9a19cd78SMatthias Ringwald * scale           Scale on output coefficients
9a19cd78SMatthias Ringwald *
9a19cd78SMatthias Ringwald * `x` and y` can be the same buffer
*4930cef6SMatthias Ringwald * The real and imaginary parts of `x` are swapped,
*4930cef6SMatthias Ringwald * to operate on FFT instead of IFFT
9a19cd78SMatthias Ringwald */
*4930cef6SMatthias RingwaldLC3_HOT static void imdct_post_fft(const struct lc3_mdct_rot_def *def,
9a19cd78SMatthias Ringwald    const struct lc3_complex *x, float *y, float scale)
9a19cd78SMatthias Ringwald{
9a19cd78SMatthias Ringwald    int n4 = def->n4;
9a19cd78SMatthias Ringwald
9a19cd78SMatthias Ringwald    const struct lc3_complex *w0 = def->w, *w1 = w0 + n4;
9a19cd78SMatthias Ringwald    const struct lc3_complex *x0 = x, *x1 = x0 + n4;
9a19cd78SMatthias Ringwald
9a19cd78SMatthias Ringwald    float *y0 = y, *y1 = y0 + 2*n4;
9a19cd78SMatthias Ringwald
9a19cd78SMatthias Ringwald    while (x0 < x1) {
9a19cd78SMatthias Ringwald        struct lc3_complex uz = *(x0++), vz = *(--x1);
9a19cd78SMatthias Ringwald        struct lc3_complex uw = *(w0++), vw = *(--w1);
9a19cd78SMatthias Ringwald
*4930cef6SMatthias Ringwald        *(y0++) = (uz.re * uw.im - uz.im * uw.re) * scale;
*4930cef6SMatthias Ringwald        *(--y1) = (uz.re * uw.re + uz.im * uw.im) * scale;
9a19cd78SMatthias Ringwald
*4930cef6SMatthias Ringwald        *(--y1) = (vz.re * vw.im - vz.im * vw.re) * scale;
*4930cef6SMatthias Ringwald        *(y0++) = (vz.re * vw.re + vz.im * vw.im) * scale;
9a19cd78SMatthias Ringwald    }
9a19cd78SMatthias Ringwald}
9a19cd78SMatthias Ringwald
9a19cd78SMatthias Ringwald/**
9a19cd78SMatthias Ringwald * Apply windowing of samples
9a19cd78SMatthias Ringwald * dt, sr          Duration and samplerate
9a19cd78SMatthias Ringwald * x, d            Middle half of IMDCT coefficients and delayed samples
9a19cd78SMatthias Ringwald * y, d            Output samples and delayed ones
9a19cd78SMatthias Ringwald */
*4930cef6SMatthias RingwaldLC3_HOT static void imdct_window(enum lc3_dt dt, enum lc3_srate sr,
9a19cd78SMatthias Ringwald    const float *x, float *d, float *y)
9a19cd78SMatthias Ringwald{
9a19cd78SMatthias Ringwald    /* The full MDCT coefficients is given by symmetry :
9a19cd78SMatthias Ringwald     *   T[   0 ..  n/4-1] = -half[n/4-1 .. 0    ]
9a19cd78SMatthias Ringwald     *   T[ n/4 ..  n/2-1] =  half[0     .. n/4-1]
9a19cd78SMatthias Ringwald     *   T[ n/2 .. 3n/4-1] =  half[n/4   .. n/2-1]
9a19cd78SMatthias Ringwald     *   T[3n/4 ..    n-1] =  half[n/2-1 .. n/4  ]  */
9a19cd78SMatthias Ringwald
9a19cd78SMatthias Ringwald    int n4 = LC3_NS(dt, sr) >> 1, nd = LC3_ND(dt, sr);
9a19cd78SMatthias Ringwald    const float *w2 = lc3_mdct_win[dt][sr], *w0 = w2 + 3*n4, *w1 = w0;
9a19cd78SMatthias Ringwald
9a19cd78SMatthias Ringwald    const float *x0 = d + nd-n4, *x1 = x0;
9a19cd78SMatthias Ringwald    float *y0 = y + nd-n4, *y1 = y0, *y2 = d + nd, *y3 = d;
9a19cd78SMatthias Ringwald
9a19cd78SMatthias Ringwald    while (y0 > y) {
9a19cd78SMatthias Ringwald        *(--y0) = *(--x0) - *(x  ) * *(w1++);
9a19cd78SMatthias Ringwald        *(y1++) = *(x1++) + *(x++) * *(--w0);
*4930cef6SMatthias Ringwald
*4930cef6SMatthias Ringwald        *(--y0) = *(--x0) - *(x  ) * *(w1++);
*4930cef6SMatthias Ringwald        *(y1++) = *(x1++) + *(x++) * *(--w0);
9a19cd78SMatthias Ringwald    }
9a19cd78SMatthias Ringwald
9a19cd78SMatthias Ringwald    while (y1 < y + nd) {
9a19cd78SMatthias Ringwald        *(y1++) = *(x1++) + *(x++) * *(--w0);
*4930cef6SMatthias Ringwald        *(y1++) = *(x1++) + *(x++) * *(--w0);
9a19cd78SMatthias Ringwald    }
9a19cd78SMatthias Ringwald
9a19cd78SMatthias Ringwald    while (y1 < y + 2*n4) {
9a19cd78SMatthias Ringwald        *(y1++) = *(x  ) * *(--w0);
9a19cd78SMatthias Ringwald        *(--y2) = *(x++) * *(w2++);
*4930cef6SMatthias Ringwald
*4930cef6SMatthias Ringwald        *(y1++) = *(x  ) * *(--w0);
*4930cef6SMatthias Ringwald        *(--y2) = *(x++) * *(w2++);
9a19cd78SMatthias Ringwald    }
9a19cd78SMatthias Ringwald
9a19cd78SMatthias Ringwald    while (y2 > y3) {
9a19cd78SMatthias Ringwald        *(y3++) = *(x  ) * *(--w0);
9a19cd78SMatthias Ringwald        *(--y2) = *(x++) * *(w2++);
*4930cef6SMatthias Ringwald
*4930cef6SMatthias Ringwald        *(y3++) = *(x  ) * *(--w0);
*4930cef6SMatthias Ringwald        *(--y2) = *(x++) * *(w2++);
9a19cd78SMatthias Ringwald    }
9a19cd78SMatthias Ringwald}
9a19cd78SMatthias Ringwald
9a19cd78SMatthias Ringwald/**
9a19cd78SMatthias Ringwald * Forward MDCT transformation
9a19cd78SMatthias Ringwald */
9a19cd78SMatthias Ringwaldvoid lc3_mdct_forward(enum lc3_dt dt, enum lc3_srate sr,
*4930cef6SMatthias Ringwald    enum lc3_srate sr_dst, const float *x, float *d, float *y)
9a19cd78SMatthias Ringwald{
9a19cd78SMatthias Ringwald    const struct lc3_mdct_rot_def *rot = lc3_mdct_rot[dt][sr];
9a19cd78SMatthias Ringwald    int nf = LC3_NS(dt, sr_dst);
9a19cd78SMatthias Ringwald    int ns = LC3_NS(dt, sr);
9a19cd78SMatthias Ringwald
*4930cef6SMatthias Ringwald    struct lc3_complex buffer[ns/2];
*4930cef6SMatthias Ringwald    struct lc3_complex *z = (struct lc3_complex *)y;
*4930cef6SMatthias Ringwald    union { float *f; struct lc3_complex *z; } u = { .z = buffer };
9a19cd78SMatthias Ringwald
*4930cef6SMatthias Ringwald    mdct_window(dt, sr, x, d, u.f);
9a19cd78SMatthias Ringwald
9a19cd78SMatthias Ringwald    mdct_pre_fft(rot, u.f, u.z);
*4930cef6SMatthias Ringwald    u.z = fft(u.z, ns/2, u.z, z);
9a19cd78SMatthias Ringwald    mdct_post_fft(rot, u.z, y, sqrtf( (2.f*nf) / (ns*ns) ));
9a19cd78SMatthias Ringwald}
9a19cd78SMatthias Ringwald
9a19cd78SMatthias Ringwald/**
9a19cd78SMatthias Ringwald * Inverse MDCT transformation
9a19cd78SMatthias Ringwald */
9a19cd78SMatthias Ringwaldvoid lc3_mdct_inverse(enum lc3_dt dt, enum lc3_srate sr,
9a19cd78SMatthias Ringwald    enum lc3_srate sr_src, const float *x, float *d, float *y)
9a19cd78SMatthias Ringwald{
9a19cd78SMatthias Ringwald    const struct lc3_mdct_rot_def *rot = lc3_mdct_rot[dt][sr];
9a19cd78SMatthias Ringwald    int nf = LC3_NS(dt, sr_src);
9a19cd78SMatthias Ringwald    int ns = LC3_NS(dt, sr);
9a19cd78SMatthias Ringwald
9a19cd78SMatthias Ringwald    struct lc3_complex buffer[ns/2];
9a19cd78SMatthias Ringwald    struct lc3_complex *z = (struct lc3_complex *)y;
9a19cd78SMatthias Ringwald    union { float *f; struct lc3_complex *z; } u = { .z = buffer };
9a19cd78SMatthias Ringwald
9a19cd78SMatthias Ringwald    imdct_pre_fft(rot, x, z);
*4930cef6SMatthias Ringwald    z = fft(z, ns/2, z, u.z);
9a19cd78SMatthias Ringwald    imdct_post_fft(rot, z, u.f, sqrtf(2.f / nf));
9a19cd78SMatthias Ringwald
9a19cd78SMatthias Ringwald    imdct_window(dt, sr, u.f, d, y);
9a19cd78SMatthias Ringwald}