xref: /btstack/3rd-party/lc3-google/src/mdct.c (revision da8e14c5aa3783b6bb7dd63e71572a901bcf168b)
1 /******************************************************************************
2  *
3  *  Copyright 2022 Google LLC
4  *
5  *  Licensed under the Apache License, Version 2.0 (the "License");
6  *  you may not use this file except in compliance with the License.
7  *  You may obtain a copy of the License at:
8  *
9  *  http://www.apache.org/licenses/LICENSE-2.0
10  *
11  *  Unless required by applicable law or agreed to in writing, software
12  *  distributed under the License is distributed on an "AS IS" BASIS,
13  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  *  See the License for the specific language governing permissions and
15  *  limitations under the License.
16  *
17  ******************************************************************************/
18 
19 #include "tables.h"
20 
21 #include "mdct_neon.h"
22 
23 
24 /* ----------------------------------------------------------------------------
25  *  FFT processing
26  * -------------------------------------------------------------------------- */
27 
28 /**
29  * FFT 5 Points
30  * x, y            Input and output coefficients, of size 5xn
31  * n               Number of interleaved transform to perform (n % 2 = 0)
32  */
33 #ifndef fft_5
34 LC3_HOT static inline void fft_5(
35     const struct lc3_complex *x, struct lc3_complex *y, int n)
36 {
37     static const float cos1 =  0.3090169944;  /* cos(-2Pi 1/5) */
38     static const float cos2 = -0.8090169944;  /* cos(-2Pi 2/5) */
39 
40     static const float sin1 = -0.9510565163;  /* sin(-2Pi 1/5) */
41     static const float sin2 = -0.5877852523;  /* sin(-2Pi 2/5) */
42 
43     for (int i = 0; i < n; i++, x++, y+= 5) {
44 
45         struct lc3_complex s14 =
46             { x[1*n].re + x[4*n].re, x[1*n].im + x[4*n].im };
47         struct lc3_complex d14 =
48             { x[1*n].re - x[4*n].re, x[1*n].im - x[4*n].im };
49 
50         struct lc3_complex s23 =
51             { x[2*n].re + x[3*n].re, x[2*n].im + x[3*n].im };
52         struct lc3_complex d23 =
53             { x[2*n].re - x[3*n].re, x[2*n].im - x[3*n].im };
54 
55         y[0].re = x[0].re + s14.re + s23.re;
56 
57         y[0].im = x[0].im + s14.im + s23.im;
58 
59         y[1].re = x[0].re + s14.re * cos1 - d14.im * sin1
60                           + s23.re * cos2 - d23.im * sin2;
61 
62         y[1].im = x[0].im + s14.im * cos1 + d14.re * sin1
63                           + s23.im * cos2 + d23.re * sin2;
64 
65         y[2].re = x[0].re + s14.re * cos2 - d14.im * sin2
66                           + s23.re * cos1 + d23.im * sin1;
67 
68         y[2].im = x[0].im + s14.im * cos2 + d14.re * sin2
69                           + s23.im * cos1 - d23.re * sin1;
70 
71         y[3].re = x[0].re + s14.re * cos2 + d14.im * sin2
72                           + s23.re * cos1 - d23.im * sin1;
73 
74         y[3].im = x[0].im + s14.im * cos2 - d14.re * sin2
75                           + s23.im * cos1 + d23.re * sin1;
76 
77         y[4].re = x[0].re + s14.re * cos1 + d14.im * sin1
78                           + s23.re * cos2 + d23.im * sin2;
79 
80         y[4].im = x[0].im + s14.im * cos1 - d14.re * sin1
81                           + s23.im * cos2 - d23.re * sin2;
82     }
83 }
84 #endif /* fft_5 */
85 
86 /**
87  * FFT Butterfly 3 Points
88  * x, y            Input and output coefficients
89  * twiddles        Twiddles factors, determine size of transform
90  * n               Number of interleaved transforms
91  */
92 #ifndef fft_bf3
93 LC3_HOT static inline void fft_bf3(
94     const struct lc3_fft_bf3_twiddles *twiddles,
95     const struct lc3_complex *x, struct lc3_complex *y, int n)
96 {
97     int n3 = twiddles->n3;
98     const struct lc3_complex (*w0)[2] = twiddles->t;
99     const struct lc3_complex (*w1)[2] = w0 + n3, (*w2)[2] = w1 + n3;
100 
101     const struct lc3_complex *x0 = x, *x1 = x0 + n*n3, *x2 = x1 + n*n3;
102     struct lc3_complex *y0 = y, *y1 = y0 + n3, *y2 = y1 + n3;
103 
104     for (int i = 0; i < n; i++, y0 += 3*n3, y1 += 3*n3, y2 += 3*n3)
105         for (int j = 0; j < n3; j++, x0++, x1++, x2++) {
106 
107             y0[j].re = x0->re + x1->re * w0[j][0].re - x1->im * w0[j][0].im
108                               + x2->re * w0[j][1].re - x2->im * w0[j][1].im;
109 
110             y0[j].im = x0->im + x1->im * w0[j][0].re + x1->re * w0[j][0].im
111                               + x2->im * w0[j][1].re + x2->re * w0[j][1].im;
112 
113             y1[j].re = x0->re + x1->re * w1[j][0].re - x1->im * w1[j][0].im
114                               + x2->re * w1[j][1].re - x2->im * w1[j][1].im;
115 
116             y1[j].im = x0->im + x1->im * w1[j][0].re + x1->re * w1[j][0].im
117                               + x2->im * w1[j][1].re + x2->re * w1[j][1].im;
118 
119             y2[j].re = x0->re + x1->re * w2[j][0].re - x1->im * w2[j][0].im
120                               + x2->re * w2[j][1].re - x2->im * w2[j][1].im;
121 
122             y2[j].im = x0->im + x1->im * w2[j][0].re + x1->re * w2[j][0].im
123                               + x2->im * w2[j][1].re + x2->re * w2[j][1].im;
124         }
125 }
126 #endif /* fft_bf3 */
127 
128 /**
129  * FFT Butterfly 2 Points
130  * twiddles        Twiddles factors, determine size of transform
131  * x, y            Input and output coefficients
132  * n               Number of interleaved transforms
133  */
134 #ifndef fft_bf2
135 LC3_HOT static inline void fft_bf2(
136     const struct lc3_fft_bf2_twiddles *twiddles,
137     const struct lc3_complex *x, struct lc3_complex *y, int n)
138 {
139     int n2 = twiddles->n2;
140     const struct lc3_complex *w = twiddles->t;
141 
142     const struct lc3_complex *x0 = x, *x1 = x0 + n*n2;
143     struct lc3_complex *y0 = y, *y1 = y0 + n2;
144 
145     for (int i = 0; i < n; i++, y0 += 2*n2, y1 += 2*n2) {
146 
147         for (int j = 0; j < n2; j++, x0++, x1++) {
148 
149             y0[j].re = x0->re + x1->re * w[j].re - x1->im * w[j].im;
150             y0[j].im = x0->im + x1->im * w[j].re + x1->re * w[j].im;
151 
152             y1[j].re = x0->re - x1->re * w[j].re + x1->im * w[j].im;
153             y1[j].im = x0->im - x1->im * w[j].re - x1->re * w[j].im;
154         }
155     }
156 }
157 #endif /* fft_bf2 */
158 
159 /**
160  * Perform FFT
161  * x, y0, y1       Input, and 2 scratch buffers of size `n`
162  * n               Number of points 30, 40, 60, 80, 90, 120, 160, 180, 240
163  * return          The buffer `y0` or `y1` that hold the result
164  *
165  * Input `x` can be the same as the `y0` second scratch buffer
166  */
167 static struct lc3_complex *fft(const struct lc3_complex *x, int n,
168     struct lc3_complex *y0, struct lc3_complex *y1)
169 {
170     struct lc3_complex *y[2] = { y1, y0 };
171     int i2, i3, is = 0;
172 
173     /* The number of points `n` can be decomposed as :
174      *
175      *   n = 5^1 * 3^n3 * 2^n2
176      *
177      *   for n = 40, 80, 160        n3 = 0, n2 = [3..5]
178      *       n = 30, 60, 120, 240   n3 = 1, n2 = [1..4]
179      *       n = 90, 180            n3 = 2, n2 = [1..2]
180      *
181      * Note that the expression `n & (n-1) == 0` is equivalent
182      * to the check that `n` is a power of 2. */
183 
184     fft_5(x, y[is], n /= 5);
185 
186     for (i3 = 0; n & (n-1); i3++, is ^= 1)
187         fft_bf3(lc3_fft_twiddles_bf3[i3], y[is], y[is ^ 1], n /= 3);
188 
189     for (i2 = 0; n > 1; i2++, is ^= 1)
190         fft_bf2(lc3_fft_twiddles_bf2[i2][i3], y[is], y[is ^ 1], n >>= 1);
191 
192     return y[is];
193 }
194 
195 
196 /* ----------------------------------------------------------------------------
197  *  MDCT processing
198  * -------------------------------------------------------------------------- */
199 
200 /**
201  * Windowing of samples before MDCT
202  * dt, sr          Duration and samplerate (size of the transform)
203  * x, y            Input current and delayed samples
204  * y, d            Output windowed samples, and delayed ones
205  */
206 LC3_HOT static void mdct_window(enum lc3_dt dt, enum lc3_srate sr,
207     const float *x, float *d, float *y)
208 {
209     int ns = LC3_NS(dt, sr), nd = LC3_ND(dt, sr);
210 
211     const float *w0 = lc3_mdct_win[dt][sr], *w1 = w0 + ns;
212     const float *w2 = w1, *w3 = w2 + nd;
213 
214     const float *x0 = x + ns-nd, *x1 = x0;
215     float *y0 = y + ns/2, *y1 = y0;
216     float *d0 = d, *d1 = d + nd;
217 
218     while (x1 > x) {
219         *(--y0) = *d0 * *(w0++) - *(--x1) * *(--w1);
220         *(y1++) = (*(d0++) = *(x0++)) * *(w2++);
221 
222         *(--y0) = *d0 * *(w0++) - *(--x1) * *(--w1);
223         *(y1++) = (*(d0++) = *(x0++)) * *(w2++);
224     }
225 
226     for (x1 += ns; x0 < x1; ) {
227         *(--y0) = *d0 * *(w0++) - *(--d1) * *(--w1);
228         *(y1++) = (*(d0++) = *(x0++)) * *(w2++) + (*d1 = *(--x1)) * *(--w3);
229 
230         *(--y0) = *d0 * *(w0++) - *(--d1) * *(--w1);
231         *(y1++) = (*(d0++) = *(x0++)) * *(w2++) + (*d1 = *(--x1)) * *(--w3);
232     }
233 }
234 
235 /**
236  * Pre-rotate MDCT coefficients of N/2 points, before FFT N/4 points FFT
237  * def             Size and twiddles factors
238  * x, y            Input and output coefficients
239  *
240  * `x` and y` can be the same buffer
241  */
242 LC3_HOT static void mdct_pre_fft(const struct lc3_mdct_rot_def *def,
243     const float *x, struct lc3_complex *y)
244 {
245     int n4 = def->n4;
246 
247     const float *x0 = x, *x1 = x0 + 2*n4;
248     const struct lc3_complex *w0 = def->w, *w1 = w0 + n4;
249     struct lc3_complex *y0 = y, *y1 = y0 + n4;
250 
251     while (x0 < x1) {
252         struct lc3_complex u, uw = *(w0++);
253         u.re = - *(--x1) * uw.re + *x0 * uw.im;
254         u.im =   *(x0++) * uw.re + *x1 * uw.im;
255 
256         struct lc3_complex v, vw = *(--w1);
257         v.re = - *(--x1) * vw.im + *x0 * vw.re;
258         v.im = - *(x0++) * vw.im - *x1 * vw.re;
259 
260         *(y0++) = u;
261         *(--y1) = v;
262     }
263 }
264 
265 /**
266  * Post-rotate FFT N/4 points coefficients, resulting MDCT N points
267  * def             Size and twiddles factors
268  * x, y            Input and output coefficients
269  * scale           Scale on output coefficients
270  *
271  * `x` and y` can be the same buffer
272  */
273 LC3_HOT static void mdct_post_fft(const struct lc3_mdct_rot_def *def,
274     const struct lc3_complex *x, float *y, float scale)
275 {
276     int n4 = def->n4, n8 = n4 >> 1;
277 
278     const struct lc3_complex *w0 = def->w + n8, *w1 = w0 - 1;
279     const struct lc3_complex *x0 = x + n8, *x1 = x0 - 1;
280 
281     float *y0 = y + n4, *y1 = y0;
282 
283     for ( ; y1 > y; x0++, x1--, w0++, w1--) {
284 
285         float u0 = (x0->im * w0->im + x0->re * w0->re) * scale;
286         float u1 = (x1->re * w1->im - x1->im * w1->re) * scale;
287 
288         float v0 = (x0->re * w0->im - x0->im * w0->re) * scale;
289         float v1 = (x1->im * w1->im + x1->re * w1->re) * scale;
290 
291         *(y0++) = u0;  *(y0++) = u1;
292         *(--y1) = v0;  *(--y1) = v1;
293     }
294 }
295 
296 /**
297  * Pre-rotate IMDCT coefficients of N points, before FFT N/4 points FFT
298  * def             Size and twiddles factors
299  * x, y            Input and output coefficients
300  *
301  * `x` and `y` can be the same buffer
302  * The real and imaginary parts of `y` are swapped,
303  * to operate on FFT instead of IFFT
304  */
305 LC3_HOT static void imdct_pre_fft(const struct lc3_mdct_rot_def *def,
306     const float *x, struct lc3_complex *y)
307 {
308     int n4 = def->n4;
309 
310     const float *x0 = x, *x1 = x0 + 2*n4;
311 
312     const struct lc3_complex *w0 = def->w, *w1 = w0 + n4;
313     struct lc3_complex *y0 = y, *y1 = y0 + n4;
314 
315     while (x0 < x1) {
316         float u0 = *(x0++), u1 = *(--x1);
317         float v0 = *(x0++), v1 = *(--x1);
318         struct lc3_complex uw = *(w0++), vw = *(--w1);
319 
320         (y0  )->re = - u0 * uw.re - u1 * uw.im;
321         (y0++)->im = - u1 * uw.re + u0 * uw.im;
322 
323         (--y1)->re = - v1 * vw.re - v0 * vw.im;
324         (  y1)->im = - v0 * vw.re + v1 * vw.im;
325     }
326 }
327 
328 /**
329  * Post-rotate FFT N/4 points coefficients, resulting IMDCT N points
330  * def             Size and twiddles factors
331  * x, y            Input and output coefficients
332  * scale           Scale on output coefficients
333  *
334  * `x` and y` can be the same buffer
335  * The real and imaginary parts of `x` are swapped,
336  * to operate on FFT instead of IFFT
337  */
338 LC3_HOT static void imdct_post_fft(const struct lc3_mdct_rot_def *def,
339     const struct lc3_complex *x, float *y, float scale)
340 {
341     int n4 = def->n4;
342 
343     const struct lc3_complex *w0 = def->w, *w1 = w0 + n4;
344     const struct lc3_complex *x0 = x, *x1 = x0 + n4;
345 
346     float *y0 = y, *y1 = y0 + 2*n4;
347 
348     while (x0 < x1) {
349         struct lc3_complex uz = *(x0++), vz = *(--x1);
350         struct lc3_complex uw = *(w0++), vw = *(--w1);
351 
352         *(y0++) = (uz.re * uw.im - uz.im * uw.re) * scale;
353         *(--y1) = (uz.re * uw.re + uz.im * uw.im) * scale;
354 
355         *(--y1) = (vz.re * vw.im - vz.im * vw.re) * scale;
356         *(y0++) = (vz.re * vw.re + vz.im * vw.im) * scale;
357     }
358 }
359 
360 /**
361  * Apply windowing of samples
362  * dt, sr          Duration and samplerate
363  * x, d            Middle half of IMDCT coefficients and delayed samples
364  * y, d            Output samples and delayed ones
365  */
366 LC3_HOT static void imdct_window(enum lc3_dt dt, enum lc3_srate sr,
367     const float *x, float *d, float *y)
368 {
369     /* The full MDCT coefficients is given by symmetry :
370      *   T[   0 ..  n/4-1] = -half[n/4-1 .. 0    ]
371      *   T[ n/4 ..  n/2-1] =  half[0     .. n/4-1]
372      *   T[ n/2 .. 3n/4-1] =  half[n/4   .. n/2-1]
373      *   T[3n/4 ..    n-1] =  half[n/2-1 .. n/4  ]  */
374 
375     int n4 = LC3_NS(dt, sr) >> 1, nd = LC3_ND(dt, sr);
376     const float *w2 = lc3_mdct_win[dt][sr], *w0 = w2 + 3*n4, *w1 = w0;
377 
378     const float *x0 = d + nd-n4, *x1 = x0;
379     float *y0 = y + nd-n4, *y1 = y0, *y2 = d + nd, *y3 = d;
380 
381     while (y0 > y) {
382         *(--y0) = *(--x0) - *(x  ) * *(w1++);
383         *(y1++) = *(x1++) + *(x++) * *(--w0);
384 
385         *(--y0) = *(--x0) - *(x  ) * *(w1++);
386         *(y1++) = *(x1++) + *(x++) * *(--w0);
387     }
388 
389     while (y1 < y + nd) {
390         *(y1++) = *(x1++) + *(x++) * *(--w0);
391         *(y1++) = *(x1++) + *(x++) * *(--w0);
392     }
393 
394     while (y1 < y + 2*n4) {
395         *(y1++) = *(x  ) * *(--w0);
396         *(--y2) = *(x++) * *(w2++);
397 
398         *(y1++) = *(x  ) * *(--w0);
399         *(--y2) = *(x++) * *(w2++);
400     }
401 
402     while (y2 > y3) {
403         *(y3++) = *(x  ) * *(--w0);
404         *(--y2) = *(x++) * *(w2++);
405 
406         *(y3++) = *(x  ) * *(--w0);
407         *(--y2) = *(x++) * *(w2++);
408     }
409 }
410 
411 /**
412  * Forward MDCT transformation
413  */
414 void lc3_mdct_forward(enum lc3_dt dt, enum lc3_srate sr,
415     enum lc3_srate sr_dst, const float *x, float *d, float *y)
416 {
417     const struct lc3_mdct_rot_def *rot = lc3_mdct_rot[dt][sr];
418     int nf = LC3_NS(dt, sr_dst);
419     int ns = LC3_NS(dt, sr);
420 
421     struct lc3_complex buffer[ns/2];
422     struct lc3_complex *z = (struct lc3_complex *)y;
423     union { float *f; struct lc3_complex *z; } u = { .z = buffer };
424 
425     mdct_window(dt, sr, x, d, u.f);
426 
427     mdct_pre_fft(rot, u.f, u.z);
428     u.z = fft(u.z, ns/2, u.z, z);
429     mdct_post_fft(rot, u.z, y, sqrtf( (2.f*nf) / (ns*ns) ));
430 }
431 
432 /**
433  * Inverse MDCT transformation
434  */
435 void lc3_mdct_inverse(enum lc3_dt dt, enum lc3_srate sr,
436     enum lc3_srate sr_src, const float *x, float *d, float *y)
437 {
438     const struct lc3_mdct_rot_def *rot = lc3_mdct_rot[dt][sr];
439     int nf = LC3_NS(dt, sr_src);
440     int ns = LC3_NS(dt, sr);
441 
442     struct lc3_complex buffer[ns/2];
443     struct lc3_complex *z = (struct lc3_complex *)y;
444     union { float *f; struct lc3_complex *z; } u = { .z = buffer };
445 
446     imdct_pre_fft(rot, x, z);
447     z = fft(z, ns/2, z, u.z);
448     imdct_post_fft(rot, z, u.f, sqrtf(2.f / nf));
449 
450     imdct_window(dt, sr, u.f, d, y);
451 }
452