xref: /btstack/3rd-party/lc3-google/src/mdct.c (revision dbf84c8a4e5f19f37bf9128db249cb40829b9966)
1 /******************************************************************************
2  *
3  *  Copyright 2021 Google, Inc.
4  *
5  *  Licensed under the Apache License, Version 2.0 (the "License");
6  *  you may not use this file except in compliance with the License.
7  *  You may obtain a copy of the License at:
8  *
9  *  http://www.apache.org/licenses/LICENSE-2.0
10  *
11  *  Unless required by applicable law or agreed to in writing, software
12  *  distributed under the License is distributed on an "AS IS" BASIS,
13  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  *  See the License for the specific language governing permissions and
15  *  limitations under the License.
16  *
17  ******************************************************************************/
18 
19 #include "tables.h"
20 
21 
22 /* ----------------------------------------------------------------------------
23  *  FFT processing
24  * -------------------------------------------------------------------------- */
25 
26 /**
27  * FFT 5 Points template
28  * s               -1: Forward  1: Inverse
29  * x, y            Input and output coefficients, of size 5xn
30  * n               Number of interleaved transform to perform
31  */
32 static inline void xfft_5(const float s,
33     const struct lc3_complex *x, struct lc3_complex *y, int n)
34 {
35     static const float cos1 =  0.3090169944;  /* cos(-2Pi 1/5) */
36     static const float cos2 = -0.8090169944;  /* cos(-2Pi 2/5) */
37 
38     static const float sin1 = -0.9510565163;  /* sin(-2Pi 1/5) */
39     static const float sin2 = -0.5877852523;  /* sin(-2Pi 2/5) */
40 
41     for (int i = 0; i < n; i++, x++, y+= 5) {
42 
43         struct lc3_complex s14 =
44             { x[1*n].re + x[4*n].re, x[1*n].im + x[4*n].im };
45         struct lc3_complex d14 =
46             { x[1*n].re - x[4*n].re, x[1*n].im - x[4*n].im };
47 
48         struct lc3_complex s23 =
49             { x[2*n].re + x[3*n].re, x[2*n].im + x[3*n].im };
50         struct lc3_complex d23 =
51             { x[2*n].re - x[3*n].re, x[2*n].im - x[3*n].im };
52 
53         y[0].re = x[0].re + s14.re + s23.re;
54         y[0].im = x[0].im + s14.im + s23.im;
55 
56         y[1].re = x[0].re + s14.re * cos1 + s * d14.im * sin1
57                           + s23.re * cos2 + s * d23.im * sin2;
58 
59         y[1].im = x[0].im + s14.im * cos1 - s * d14.re * sin1
60                           + s23.im * cos2 - s * d23.re * sin2;
61 
62         y[2].re = x[0].re + s14.re * cos2 + s * d14.im * sin2
63                           + s23.re * cos1 - s * d23.im * sin1;
64 
65         y[2].im = x[0].im + s14.im * cos2 - s * d14.re * sin2
66                           + s23.im * cos1 + s * d23.re * sin1;
67 
68         y[3].re = x[0].re + s14.re * cos2 - s * d14.im * sin2
69                           + s23.re * cos1 + s * d23.im * sin1;
70 
71         y[3].im = x[0].im + s14.im * cos2 + s * d14.re * sin2
72                           + s23.im * cos1 - s * d23.re * sin1;
73 
74         y[4].re = x[0].re + s14.re * cos1 - s * d14.im * sin1
75                           + s23.re * cos2 - s * d23.im * sin2;
76 
77         y[4].im = x[0].im + s14.im * cos1 + s * d14.re * sin1
78                           + s23.im * cos2 + s * d23.re * sin2;
79     }
80 }
81 
82 /**
83  * FFT Butterfly 3 Points template
84  * s               -1: Forward  1: Inverse
85  * x, y            Input and output coefficients
86  * twiddles        Twiddles factors, determine size of transform
87  * n               Number of interleaved transforms
88  */
89 static inline void xfft_bf3(
90     const float s, const struct lc3_fft_bf3_twiddles *twiddles,
91     const struct lc3_complex *x, struct lc3_complex *y, int n)
92 {
93     int n3 = twiddles->n3;
94     const struct lc3_complex (*w0)[2] = twiddles->t;
95     const struct lc3_complex (*w1)[2] = w0 + n3, (*w2)[2] = w1 + n3;
96 
97     const struct lc3_complex *x0 = x, *x1 = x0 + n*n3, *x2 = x1 + n*n3;
98     struct lc3_complex *y0 = y, *y1 = y0 + n3, *y2 = y1 + n3;
99 
100     for (int i = 0; i < n; i++, y0 += 3*n3, y1 += 3*n3, y2 += 3*n3) {
101 
102         for (int j = 0; j < n3; j++, x0++, x1++, x2++) {
103 
104             y0[j].re = x0->re + x1->re * w0[j][0].re + s * x1->im * w0[j][0].im
105                               + x2->re * w0[j][1].re + s * x2->im * w0[j][1].im;
106 
107             y0[j].im = x0->im + x1->im * w0[j][0].re - s * x1->re * w0[j][0].im
108                               + x2->im * w0[j][1].re - s * x2->re * w0[j][1].im;
109 
110             y1[j].re = x0->re + x1->re * w1[j][0].re + s * x1->im * w1[j][0].im
111                               + x2->re * w1[j][1].re + s * x2->im * w1[j][1].im;
112 
113             y1[j].im = x0->im + x1->im * w1[j][0].re - s * x1->re * w1[j][0].im
114                               + x2->im * w1[j][1].re - s * x2->re * w1[j][1].im;
115 
116             y2[j].re = x0->re + x1->re * w2[j][0].re + s * x1->im * w2[j][0].im
117                               + x2->re * w2[j][1].re + s * x2->im * w2[j][1].im;
118 
119             y2[j].im = x0->im + x1->im * w2[j][0].re - s * x1->re * w2[j][0].im
120                               + x2->im * w2[j][1].re - s * x2->re * w2[j][1].im;
121         }
122     }
123 }
124 
125 /**
126  * FFT Butterfly 2 Points template
127  * s               -1: Forward  1: Inverse
128  * twiddles        Twiddles factors, determine size of transform
129  * x, y            Input and output coefficients
130  * n               Number of interleaved transforms
131  */
132 static inline void xfft_bf2(
133     const float s, const struct lc3_fft_bf2_twiddles *twiddles,
134     const struct lc3_complex *x, struct lc3_complex *y, int n)
135 {
136     int n2 = twiddles->n2;
137     const struct lc3_complex *w = twiddles->t;
138 
139     const struct lc3_complex *x0 = x, *x1 = x0 + n*n2;
140     struct lc3_complex *y0 = y, *y1 = y0 + n2;
141 
142     for (int i = 0; i < n; i++, y0 += 2*n2, y1 += 2*n2) {
143 
144         for (int j = 0; j < n2; j++, x0++, x1++) {
145 
146             y0[j].re = x0->re + x1->re * w[j].re + s * x1->im * w[j].im;
147             y0[j].im = x0->im + x1->im * w[j].re - s * x1->re * w[j].im;
148 
149             y1[j].re = x0->re - x1->re * w[j].re - s * x1->im * w[j].im;
150             y1[j].im = x0->im - x1->im * w[j].re + s * x1->re * w[j].im;
151         }
152     }
153 }
154 
155 /**
156  * Forward FFT 5 Points
157  * x, y            Input and output coefficients, of size 5xn
158  * n               Number of interleaved transform to perform
159  */
160 static void ffft_5(const struct lc3_complex *x, struct lc3_complex *y, int n)
161 {
162     xfft_5(-1, x, y, n);
163 }
164 
165 /**
166  * Inverse FFT 5 Points
167  * x, y            Input and output coefficients, of size 5xn
168  * n               Number of interleaved transform to perform
169  */
170 static void ifft_5(const struct lc3_complex *x, struct lc3_complex *y, int n)
171 {
172     xfft_5(1, x, y, n);
173 }
174 
175 /**
176  * Forward FFT Butterfly 3 Points
177  * twiddles        Twiddles factors, determine size of transform
178  * x, y            Input and output coefficients
179  * n               Number of interleaved transforms
180  */
181 static void ffft_bf3(const struct lc3_fft_bf3_twiddles *twiddles,
182     const struct lc3_complex *x, struct lc3_complex *y, int n)
183 {
184     xfft_bf3(-1, twiddles, x, y, n);
185 }
186 
187 /**
188  * Inverse FFT Butterfly 3 Points
189  * twiddles        Twiddles factors, determine size of transform
190  * x, y            Input and output coefficients
191  * n               Number of interleaved transforms
192  */
193 static void ifft_bf3(const struct lc3_fft_bf3_twiddles *twiddles,
194     const struct lc3_complex *x, struct lc3_complex *y, int n)
195 {
196     xfft_bf3(1, twiddles, x, y, n);
197 }
198 
199 /**
200  * Forward FFT Butterfly 2 Points
201  * twiddles        Twiddles factors, determine size of transform
202  * x, y            Input and output coefficients
203  * n               Number of interleaved transforms
204  */
205 static void ffft_bf2(const struct lc3_fft_bf2_twiddles *twiddles,
206     const struct lc3_complex *x, struct lc3_complex *y, int n)
207 {
208     xfft_bf2(-1, twiddles, x, y, n);
209 }
210 
211 /**
212  * InverseIFFT Butterfly 2 Points
213  * twiddles        Twiddles factors, determine size of transform
214  * x, y            Input and output coefficients
215  * n               Number of interleaved transforms
216  */
217 static void ifft_bf2(const struct lc3_fft_bf2_twiddles *twiddles,
218     const struct lc3_complex *x, struct lc3_complex *y, int n)
219 {
220     xfft_bf2(1, twiddles, x, y, n);
221 }
222 
223 /**
224  * Perform FFT
225  * inverse         True on inverse transform else forward
226  * x, y0, y1       Input, and 2 scratch buffers of size `n`
227  * n               Number of points 30, 40, 60, 80, 90, 120, 160, 180, 240
228  * return          The buffer `y0` or `y1` that hold the result
229  *
230  * Input `x` can be the same as the `y0` second scratch buffer
231  */
232 static struct lc3_complex *fft(
233     bool inverse, const struct lc3_complex *x, int n,
234     struct lc3_complex *y0, struct lc3_complex *y1)
235 {
236     struct lc3_complex *y[2] = { y1, y0 };
237     int i2, i3, is = 0;
238 
239     /* The number of points `n` can be decomposed as :
240      *
241      *   n = 5^1 * 3^n3 * 2^n2
242      *
243      *   for n = 40, 80, 160        n3 = 0, n2 = [3..5]
244      *       n = 30, 60, 120, 240   n3 = 1, n2 = [1..4]
245      *       n = 90, 180            n3 = 2, n2 = [1..2]
246      *
247      * Note that the expression `n & (n-1) == 0` is equivalent
248      * to the check that `n` is a power of 2. */
249 
250     (inverse ? ifft_5 : ffft_5)(x, y[is], n /= 5);
251 
252     for (i3 = 0; n & (n-1); i3++, is ^= 1)
253         (inverse ? ifft_bf3 : ffft_bf3)
254             (lc3_fft_twiddles_bf3[i3], y[is], y[is ^ 1], n /= 3);
255 
256     for (i2 = 0; n > 1; i2++, is ^= 1)
257         (inverse ? ifft_bf2 : ffft_bf2)
258             (lc3_fft_twiddles_bf2[i2][i3], y[is], y[is ^ 1], n >>= 1);
259 
260     return y[is];
261 }
262 
263 
264 /* ----------------------------------------------------------------------------
265  *  MDCT processing
266  * -------------------------------------------------------------------------- */
267 
268 /**
269  * Windowing of samples before MDCT
270  * dt, sr          Duration and samplerate (size of the transform)
271  * x               [-nd..-1] Previous, [0..ns-1] Current samples
272  * y               Output `ns` windowed samples
273  *
274  * The number of previous samples `nd` accessed on `x` is :
275  *   nd: `ns` * 23/30 for 7.5ms frame duration
276  *   nd: `ns` *  5/ 8 for  10ms frame duration
277  */
278 static void mdct_window(
279     enum lc3_dt dt, enum lc3_srate sr, const float *x, float *y)
280 {
281     int ns = LC3_NS(dt, sr), nd = LC3_ND(dt, sr);
282 
283     const float *w0 = lc3_mdct_win[dt][sr], *w1 = w0 + ns;
284     const float *w2 = w1, *w3 = w2 + nd;
285 
286     const float *x0 = x - nd, *x1 = x0 + ns;
287     const float *x2 = x1, *x3 = x2 + nd;
288 
289     float *y0 = y + ns/2, *y1 = y0;
290 
291     while (x0 < x1)
292         *(--y0) = *(x0++) * *(w0++) - *(--x1) * *(--w1);
293 
294     for (const float *xe = x2 + ns-nd; x2 < xe; )
295         *(y1++) = *(x2++) * *(w2++);
296 
297     while (x2 < x3)
298         *(y1++) = *(x2++) * *(w2++) + *(--x3) * *(--w3);
299 }
300 
301 /**
302  * Pre-rotate MDCT coefficients of N/2 points, before FFT N/4 points FFT
303  * def             Size and twiddles factors
304  * x, y            Input and output coefficients
305  *
306  * `x` and y` can be the same buffer
307  */
308 static void mdct_pre_fft(const struct lc3_mdct_rot_def *def,
309     const float *x, struct lc3_complex *y)
310 {
311     int n4 = def->n4;
312 
313     const float *x0 = x, *x1 = x0 + 2*n4;
314     const struct lc3_complex *w0 = def->w, *w1 = w0 + n4;
315     struct lc3_complex *y0 = y, *y1 = y0 + n4;
316 
317     while (x0 < x1) {
318         struct lc3_complex u, uw = *(w0++);
319         u.re = - *(--x1) * uw.re + *x0 * uw.im;
320         u.im =   *(x0++) * uw.re + *x1 * uw.im;
321 
322         struct lc3_complex v, vw = *(--w1);
323         v.re = - *(--x1) * vw.im + *x0 * vw.re;
324         v.im = - *(x0++) * vw.im - *x1 * vw.re;
325 
326         *(y0++) = u;
327         *(--y1) = v;
328     }
329 }
330 
331 /**
332  * Post-rotate FFT N/4 points coefficients, resulting MDCT N points
333  * def             Size and twiddles factors
334  * x, y            Input and output coefficients
335  * scale           Scale on output coefficients
336  *
337  * `x` and y` can be the same buffer
338  */
339 static void mdct_post_fft(const struct lc3_mdct_rot_def *def,
340     const struct lc3_complex *x, float *y, float scale)
341 {
342     int n4 = def->n4, n8 = n4 >> 1;
343 
344     const struct lc3_complex *w0 = def->w + n8, *w1 = w0 - 1;
345     const struct lc3_complex *x0 = x + n8, *x1 = x0 - 1;
346 
347     float *y0 = y + n4, *y1 = y0;
348 
349     for ( ; y1 > y; x0++, x1--, w0++, w1--) {
350 
351         float u0 = (x0->im * w0->im + x0->re * w0->re) * scale;
352         float u1 = (x1->re * w1->im - x1->im * w1->re) * scale;
353 
354         float v0 = (x0->re * w0->im - x0->im * w0->re) * scale;
355         float v1 = (x1->im * w1->im + x1->re * w1->re) * scale;
356 
357         *(y0++) = u0;  *(y0++) = u1;
358         *(--y1) = v0;  *(--y1) = v1;
359     }
360 }
361 
362 /**
363  * Pre-rotate IMDCT coefficients of N points, before FFT N/4 points FFT
364  * def             Size and twiddles factors
365  * x, y            Input and output coefficients
366  *
367  * `x` and y` can be the same buffer
368  */
369 static void imdct_pre_fft(const struct lc3_mdct_rot_def *def,
370     const float *x, struct lc3_complex *y)
371 {
372     int n4 = def->n4;
373 
374     const float *x0 = x, *x1 = x0 + 2*n4;
375 
376     const struct lc3_complex *w0 = def->w, *w1 = w0 + n4;
377     struct lc3_complex *y0 = y, *y1 = y0 + n4;
378 
379     while (x0 < x1) {
380         float u0 = *(x0++), u1 = *(--x1);
381         float v0 = *(x0++), v1 = *(--x1);
382         struct lc3_complex uw = *(w0++), vw = *(--w1);
383 
384         (y0  )->re = - u1 * uw.re + u0 * uw.im;
385         (y0++)->im = - u0 * uw.re - u1 * uw.im;
386 
387         (--y1)->re = - v0 * vw.re + v1 * vw.im;
388         (  y1)->im = - v1 * vw.re - v0 * vw.im;
389     }
390 }
391 
392 /**
393  * Post-rotate FFT N/4 points coefficients, resulting IMDCT N points
394  * def             Size and twiddles factors
395  * x, y            Input and output coefficients
396  * scale           Scale on output coefficients
397  *
398  * `x` and y` can be the same buffer
399  */
400 static void imdct_post_fft(const struct lc3_mdct_rot_def *def,
401     const struct lc3_complex *x, float *y, float scale)
402 {
403     int n4 = def->n4;
404 
405     const struct lc3_complex *w0 = def->w, *w1 = w0 + n4;
406     const struct lc3_complex *x0 = x, *x1 = x0 + n4;
407 
408     float *y0 = y, *y1 = y0 + 2*n4;
409 
410     while (x0 < x1) {
411         struct lc3_complex uz = *(x0++), vz = *(--x1);
412         struct lc3_complex uw = *(w0++), vw = *(--w1);
413 
414         *(y0++) = (uz.im * uw.im - uz.re * uw.re) * scale;
415         *(--y1) = (uz.im * uw.re + uz.re * uw.im) * scale;
416 
417         *(--y1) = (vz.im * vw.im - vz.re * vw.re) * scale;
418         *(y0++) = (vz.im * vw.re + vz.re * vw.im) * scale;
419     }
420 }
421 
422 /**
423  * Apply windowing of samples
424  * dt, sr          Duration and samplerate
425  * x, d            Middle half of IMDCT coefficients and delayed samples
426  * y, d            Output samples and delayed ones
427  */
428 static void imdct_window(enum lc3_dt dt, enum lc3_srate sr,
429     const float *x, float *d, float *y)
430 {
431     /* The full MDCT coefficients is given by symmetry :
432      *   T[   0 ..  n/4-1] = -half[n/4-1 .. 0    ]
433      *   T[ n/4 ..  n/2-1] =  half[0     .. n/4-1]
434      *   T[ n/2 .. 3n/4-1] =  half[n/4   .. n/2-1]
435      *   T[3n/4 ..    n-1] =  half[n/2-1 .. n/4  ]  */
436 
437     int n4 = LC3_NS(dt, sr) >> 1, nd = LC3_ND(dt, sr);
438     const float *w2 = lc3_mdct_win[dt][sr], *w0 = w2 + 3*n4, *w1 = w0;
439 
440     const float *x0 = d + nd-n4, *x1 = x0;
441     float *y0 = y + nd-n4, *y1 = y0, *y2 = d + nd, *y3 = d;
442 
443     while (y0 > y) {
444         *(--y0) = *(--x0) - *(x  ) * *(w1++);
445         *(y1++) = *(x1++) + *(x++) * *(--w0);
446     }
447 
448     while (y1 < y + nd) {
449         *(y1++) = *(x1++) + *(x++) * *(--w0);
450     }
451 
452     while (y1 < y + 2*n4) {
453         *(y1++) = *(x  ) * *(--w0);
454         *(--y2) = *(x++) * *(w2++);
455     }
456 
457     while (y2 > y3) {
458         *(y3++) = *(x  ) * *(--w0);
459         *(--y2) = *(x++) * *(w2++);
460     }
461 }
462 
463 /**
464  * Forward MDCT transformation
465  */
466 void lc3_mdct_forward(enum lc3_dt dt, enum lc3_srate sr,
467     enum lc3_srate sr_dst, const float *x, float *y)
468 {
469     const struct lc3_mdct_rot_def *rot = lc3_mdct_rot[dt][sr];
470     int nf = LC3_NS(dt, sr_dst);
471     int ns = LC3_NS(dt, sr);
472 
473     union { float *f; struct lc3_complex *z; } u = { .f = y };
474     struct lc3_complex z[ns/2];
475 
476     mdct_window(dt, sr, x, u.f);
477 
478     mdct_pre_fft(rot, u.f, u.z);
479     u.z = fft(false, u.z, ns/2, u.z, z);
480     mdct_post_fft(rot, u.z, y, sqrtf( (2.f*nf) / (ns*ns) ));
481 }
482 
483 /**
484  * Inverse MDCT transformation
485  */
486 void lc3_mdct_inverse(enum lc3_dt dt, enum lc3_srate sr,
487     enum lc3_srate sr_src, const float *x, float *d, float *y)
488 {
489     const struct lc3_mdct_rot_def *rot = lc3_mdct_rot[dt][sr];
490     int nf = LC3_NS(dt, sr_src);
491     int ns = LC3_NS(dt, sr);
492 
493     struct lc3_complex buffer[ns/2];
494     struct lc3_complex *z = (struct lc3_complex *)y;
495     union { float *f; struct lc3_complex *z; } u = { .z = buffer };
496 
497     imdct_pre_fft(rot, x, z);
498     z = fft(true, z, ns/2, z, u.z);
499     imdct_post_fft(rot, z, u.f, sqrtf(2.f / nf));
500 
501     imdct_window(dt, sr, u.f, d, y);
502 }
503