1 /****************************************************************************** 2 * 3 * Copyright 2021 Google, Inc. 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ******************************************************************************/ 18 19 #include "tables.h" 20 21 22 /* ---------------------------------------------------------------------------- 23 * FFT processing 24 * -------------------------------------------------------------------------- */ 25 26 /** 27 * FFT 5 Points template 28 * s -1: Forward 1: Inverse 29 * x, y Input and output coefficients, of size 5xn 30 * n Number of interleaved transform to perform 31 */ 32 static inline void xfft_5(const float s, 33 const struct lc3_complex *x, struct lc3_complex *y, int n) 34 { 35 static const float cos1 = 0.3090169944; /* cos(-2Pi 1/5) */ 36 static const float cos2 = -0.8090169944; /* cos(-2Pi 2/5) */ 37 38 static const float sin1 = -0.9510565163; /* sin(-2Pi 1/5) */ 39 static const float sin2 = -0.5877852523; /* sin(-2Pi 2/5) */ 40 41 for (int i = 0; i < n; i++, x++, y+= 5) { 42 43 struct lc3_complex s14 = 44 { x[1*n].re + x[4*n].re, x[1*n].im + x[4*n].im }; 45 struct lc3_complex d14 = 46 { x[1*n].re - x[4*n].re, x[1*n].im - x[4*n].im }; 47 48 struct lc3_complex s23 = 49 { x[2*n].re + x[3*n].re, x[2*n].im + x[3*n].im }; 50 struct lc3_complex d23 = 51 { x[2*n].re - x[3*n].re, x[2*n].im - x[3*n].im }; 52 53 y[0].re = x[0].re + s14.re + s23.re; 54 y[0].im = x[0].im + s14.im + s23.im; 55 56 y[1].re = x[0].re + s14.re * cos1 + s * d14.im * sin1 57 + s23.re * cos2 + s * d23.im * sin2; 58 59 y[1].im = x[0].im + s14.im * cos1 - s * d14.re * sin1 60 + s23.im * cos2 - s * d23.re * sin2; 61 62 y[2].re = x[0].re + s14.re * cos2 + s * d14.im * sin2 63 + s23.re * cos1 - s * d23.im * sin1; 64 65 y[2].im = x[0].im + s14.im * cos2 - s * d14.re * sin2 66 + s23.im * cos1 + s * d23.re * sin1; 67 68 y[3].re = x[0].re + s14.re * cos2 - s * d14.im * sin2 69 + s23.re * cos1 + s * d23.im * sin1; 70 71 y[3].im = x[0].im + s14.im * cos2 + s * d14.re * sin2 72 + s23.im * cos1 - s * d23.re * sin1; 73 74 y[4].re = x[0].re + s14.re * cos1 - s * d14.im * sin1 75 + s23.re * cos2 - s * d23.im * sin2; 76 77 y[4].im = x[0].im + s14.im * cos1 + s * d14.re * sin1 78 + s23.im * cos2 + s * d23.re * sin2; 79 } 80 } 81 82 /** 83 * FFT Butterfly 3 Points template 84 * s -1: Forward 1: Inverse 85 * x, y Input and output coefficients 86 * twiddles Twiddles factors, determine size of transform 87 * n Number of interleaved transforms 88 */ 89 static inline void xfft_bf3( 90 const float s, const struct lc3_fft_bf3_twiddles *twiddles, 91 const struct lc3_complex *x, struct lc3_complex *y, int n) 92 { 93 int n3 = twiddles->n3; 94 const struct lc3_complex (*w0)[2] = twiddles->t; 95 const struct lc3_complex (*w1)[2] = w0 + n3, (*w2)[2] = w1 + n3; 96 97 const struct lc3_complex *x0 = x, *x1 = x0 + n*n3, *x2 = x1 + n*n3; 98 struct lc3_complex *y0 = y, *y1 = y0 + n3, *y2 = y1 + n3; 99 100 for (int i = 0; i < n; i++, y0 += 3*n3, y1 += 3*n3, y2 += 3*n3) { 101 102 for (int j = 0; j < n3; j++, x0++, x1++, x2++) { 103 104 y0[j].re = x0->re + x1->re * w0[j][0].re + s * x1->im * w0[j][0].im 105 + x2->re * w0[j][1].re + s * x2->im * w0[j][1].im; 106 107 y0[j].im = x0->im + x1->im * w0[j][0].re - s * x1->re * w0[j][0].im 108 + x2->im * w0[j][1].re - s * x2->re * w0[j][1].im; 109 110 y1[j].re = x0->re + x1->re * w1[j][0].re + s * x1->im * w1[j][0].im 111 + x2->re * w1[j][1].re + s * x2->im * w1[j][1].im; 112 113 y1[j].im = x0->im + x1->im * w1[j][0].re - s * x1->re * w1[j][0].im 114 + x2->im * w1[j][1].re - s * x2->re * w1[j][1].im; 115 116 y2[j].re = x0->re + x1->re * w2[j][0].re + s * x1->im * w2[j][0].im 117 + x2->re * w2[j][1].re + s * x2->im * w2[j][1].im; 118 119 y2[j].im = x0->im + x1->im * w2[j][0].re - s * x1->re * w2[j][0].im 120 + x2->im * w2[j][1].re - s * x2->re * w2[j][1].im; 121 } 122 } 123 } 124 125 /** 126 * FFT Butterfly 2 Points template 127 * s -1: Forward 1: Inverse 128 * twiddles Twiddles factors, determine size of transform 129 * x, y Input and output coefficients 130 * n Number of interleaved transforms 131 */ 132 static inline void xfft_bf2( 133 const float s, const struct lc3_fft_bf2_twiddles *twiddles, 134 const struct lc3_complex *x, struct lc3_complex *y, int n) 135 { 136 int n2 = twiddles->n2; 137 const struct lc3_complex *w = twiddles->t; 138 139 const struct lc3_complex *x0 = x, *x1 = x0 + n*n2; 140 struct lc3_complex *y0 = y, *y1 = y0 + n2; 141 142 for (int i = 0; i < n; i++, y0 += 2*n2, y1 += 2*n2) { 143 144 for (int j = 0; j < n2; j++, x0++, x1++) { 145 146 y0[j].re = x0->re + x1->re * w[j].re + s * x1->im * w[j].im; 147 y0[j].im = x0->im + x1->im * w[j].re - s * x1->re * w[j].im; 148 149 y1[j].re = x0->re - x1->re * w[j].re - s * x1->im * w[j].im; 150 y1[j].im = x0->im - x1->im * w[j].re + s * x1->re * w[j].im; 151 } 152 } 153 } 154 155 /** 156 * Forward FFT 5 Points 157 * x, y Input and output coefficients, of size 5xn 158 * n Number of interleaved transform to perform 159 */ 160 static void ffft_5(const struct lc3_complex *x, struct lc3_complex *y, int n) 161 { 162 xfft_5(-1, x, y, n); 163 } 164 165 /** 166 * Inverse FFT 5 Points 167 * x, y Input and output coefficients, of size 5xn 168 * n Number of interleaved transform to perform 169 */ 170 static void ifft_5(const struct lc3_complex *x, struct lc3_complex *y, int n) 171 { 172 xfft_5(1, x, y, n); 173 } 174 175 /** 176 * Forward FFT Butterfly 3 Points 177 * twiddles Twiddles factors, determine size of transform 178 * x, y Input and output coefficients 179 * n Number of interleaved transforms 180 */ 181 static void ffft_bf3(const struct lc3_fft_bf3_twiddles *twiddles, 182 const struct lc3_complex *x, struct lc3_complex *y, int n) 183 { 184 xfft_bf3(-1, twiddles, x, y, n); 185 } 186 187 /** 188 * Inverse FFT Butterfly 3 Points 189 * twiddles Twiddles factors, determine size of transform 190 * x, y Input and output coefficients 191 * n Number of interleaved transforms 192 */ 193 static void ifft_bf3(const struct lc3_fft_bf3_twiddles *twiddles, 194 const struct lc3_complex *x, struct lc3_complex *y, int n) 195 { 196 xfft_bf3(1, twiddles, x, y, n); 197 } 198 199 /** 200 * Forward FFT Butterfly 2 Points 201 * twiddles Twiddles factors, determine size of transform 202 * x, y Input and output coefficients 203 * n Number of interleaved transforms 204 */ 205 static void ffft_bf2(const struct lc3_fft_bf2_twiddles *twiddles, 206 const struct lc3_complex *x, struct lc3_complex *y, int n) 207 { 208 xfft_bf2(-1, twiddles, x, y, n); 209 } 210 211 /** 212 * InverseIFFT Butterfly 2 Points 213 * twiddles Twiddles factors, determine size of transform 214 * x, y Input and output coefficients 215 * n Number of interleaved transforms 216 */ 217 static void ifft_bf2(const struct lc3_fft_bf2_twiddles *twiddles, 218 const struct lc3_complex *x, struct lc3_complex *y, int n) 219 { 220 xfft_bf2(1, twiddles, x, y, n); 221 } 222 223 /** 224 * Perform FFT 225 * inverse True on inverse transform else forward 226 * x, y0, y1 Input, and 2 scratch buffers of size `n` 227 * n Number of points 30, 40, 60, 80, 90, 120, 160, 180, 240 228 * return The buffer `y0` or `y1` that hold the result 229 * 230 * Input `x` can be the same as the `y0` second scratch buffer 231 */ 232 static struct lc3_complex *fft( 233 bool inverse, const struct lc3_complex *x, int n, 234 struct lc3_complex *y0, struct lc3_complex *y1) 235 { 236 struct lc3_complex *y[2] = { y1, y0 }; 237 int i2, i3, is = 0; 238 239 /* The number of points `n` can be decomposed as : 240 * 241 * n = 5^1 * 3^n3 * 2^n2 242 * 243 * for n = 40, 80, 160 n3 = 0, n2 = [3..5] 244 * n = 30, 60, 120, 240 n3 = 1, n2 = [1..4] 245 * n = 90, 180 n3 = 2, n2 = [1..2] 246 * 247 * Note that the expression `n & (n-1) == 0` is equivalent 248 * to the check that `n` is a power of 2. */ 249 250 (inverse ? ifft_5 : ffft_5)(x, y[is], n /= 5); 251 252 for (i3 = 0; n & (n-1); i3++, is ^= 1) 253 (inverse ? ifft_bf3 : ffft_bf3) 254 (lc3_fft_twiddles_bf3[i3], y[is], y[is ^ 1], n /= 3); 255 256 for (i2 = 0; n > 1; i2++, is ^= 1) 257 (inverse ? ifft_bf2 : ffft_bf2) 258 (lc3_fft_twiddles_bf2[i2][i3], y[is], y[is ^ 1], n >>= 1); 259 260 return y[is]; 261 } 262 263 264 /* ---------------------------------------------------------------------------- 265 * MDCT processing 266 * -------------------------------------------------------------------------- */ 267 268 /** 269 * Windowing of samples before MDCT 270 * dt, sr Duration and samplerate (size of the transform) 271 * x [-nd..-1] Previous, [0..ns-1] Current samples 272 * y Output `ns` windowed samples 273 * 274 * The number of previous samples `nd` accessed on `x` is : 275 * nd: `ns` * 23/30 for 7.5ms frame duration 276 * nd: `ns` * 5/ 8 for 10ms frame duration 277 */ 278 static void mdct_window( 279 enum lc3_dt dt, enum lc3_srate sr, const float *x, float *y) 280 { 281 int ns = LC3_NS(dt, sr), nd = LC3_ND(dt, sr); 282 283 const float *w0 = lc3_mdct_win[dt][sr], *w1 = w0 + ns; 284 const float *w2 = w1, *w3 = w2 + nd; 285 286 const float *x0 = x - nd, *x1 = x0 + ns; 287 const float *x2 = x1, *x3 = x2 + nd; 288 289 float *y0 = y + ns/2, *y1 = y0; 290 291 while (x0 < x1) 292 *(--y0) = *(x0++) * *(w0++) - *(--x1) * *(--w1); 293 294 for (const float *xe = x2 + ns-nd; x2 < xe; ) 295 *(y1++) = *(x2++) * *(w2++); 296 297 while (x2 < x3) 298 *(y1++) = *(x2++) * *(w2++) + *(--x3) * *(--w3); 299 } 300 301 /** 302 * Pre-rotate MDCT coefficients of N/2 points, before FFT N/4 points FFT 303 * def Size and twiddles factors 304 * x, y Input and output coefficients 305 * 306 * `x` and y` can be the same buffer 307 */ 308 static void mdct_pre_fft(const struct lc3_mdct_rot_def *def, 309 const float *x, struct lc3_complex *y) 310 { 311 int n4 = def->n4; 312 313 const float *x0 = x, *x1 = x0 + 2*n4; 314 const struct lc3_complex *w0 = def->w, *w1 = w0 + n4; 315 struct lc3_complex *y0 = y, *y1 = y0 + n4; 316 317 while (x0 < x1) { 318 struct lc3_complex u, uw = *(w0++); 319 u.re = - *(--x1) * uw.re + *x0 * uw.im; 320 u.im = *(x0++) * uw.re + *x1 * uw.im; 321 322 struct lc3_complex v, vw = *(--w1); 323 v.re = - *(--x1) * vw.im + *x0 * vw.re; 324 v.im = - *(x0++) * vw.im - *x1 * vw.re; 325 326 *(y0++) = u; 327 *(--y1) = v; 328 } 329 } 330 331 /** 332 * Post-rotate FFT N/4 points coefficients, resulting MDCT N points 333 * def Size and twiddles factors 334 * x, y Input and output coefficients 335 * scale Scale on output coefficients 336 * 337 * `x` and y` can be the same buffer 338 */ 339 static void mdct_post_fft(const struct lc3_mdct_rot_def *def, 340 const struct lc3_complex *x, float *y, float scale) 341 { 342 int n4 = def->n4, n8 = n4 >> 1; 343 344 const struct lc3_complex *w0 = def->w + n8, *w1 = w0 - 1; 345 const struct lc3_complex *x0 = x + n8, *x1 = x0 - 1; 346 347 float *y0 = y + n4, *y1 = y0; 348 349 for ( ; y1 > y; x0++, x1--, w0++, w1--) { 350 351 float u0 = (x0->im * w0->im + x0->re * w0->re) * scale; 352 float u1 = (x1->re * w1->im - x1->im * w1->re) * scale; 353 354 float v0 = (x0->re * w0->im - x0->im * w0->re) * scale; 355 float v1 = (x1->im * w1->im + x1->re * w1->re) * scale; 356 357 *(y0++) = u0; *(y0++) = u1; 358 *(--y1) = v0; *(--y1) = v1; 359 } 360 } 361 362 /** 363 * Pre-rotate IMDCT coefficients of N points, before FFT N/4 points FFT 364 * def Size and twiddles factors 365 * x, y Input and output coefficients 366 * 367 * `x` and y` can be the same buffer 368 */ 369 static void imdct_pre_fft(const struct lc3_mdct_rot_def *def, 370 const float *x, struct lc3_complex *y) 371 { 372 int n4 = def->n4; 373 374 const float *x0 = x, *x1 = x0 + 2*n4; 375 376 const struct lc3_complex *w0 = def->w, *w1 = w0 + n4; 377 struct lc3_complex *y0 = y, *y1 = y0 + n4; 378 379 while (x0 < x1) { 380 float u0 = *(x0++), u1 = *(--x1); 381 float v0 = *(x0++), v1 = *(--x1); 382 struct lc3_complex uw = *(w0++), vw = *(--w1); 383 384 (y0 )->re = - u1 * uw.re + u0 * uw.im; 385 (y0++)->im = - u0 * uw.re - u1 * uw.im; 386 387 (--y1)->re = - v0 * vw.re + v1 * vw.im; 388 ( y1)->im = - v1 * vw.re - v0 * vw.im; 389 } 390 } 391 392 /** 393 * Post-rotate FFT N/4 points coefficients, resulting IMDCT N points 394 * def Size and twiddles factors 395 * x, y Input and output coefficients 396 * scale Scale on output coefficients 397 * 398 * `x` and y` can be the same buffer 399 */ 400 static void imdct_post_fft(const struct lc3_mdct_rot_def *def, 401 const struct lc3_complex *x, float *y, float scale) 402 { 403 int n4 = def->n4; 404 405 const struct lc3_complex *w0 = def->w, *w1 = w0 + n4; 406 const struct lc3_complex *x0 = x, *x1 = x0 + n4; 407 408 float *y0 = y, *y1 = y0 + 2*n4; 409 410 while (x0 < x1) { 411 struct lc3_complex uz = *(x0++), vz = *(--x1); 412 struct lc3_complex uw = *(w0++), vw = *(--w1); 413 414 *(y0++) = (uz.im * uw.im - uz.re * uw.re) * scale; 415 *(--y1) = (uz.im * uw.re + uz.re * uw.im) * scale; 416 417 *(--y1) = (vz.im * vw.im - vz.re * vw.re) * scale; 418 *(y0++) = (vz.im * vw.re + vz.re * vw.im) * scale; 419 } 420 } 421 422 /** 423 * Apply windowing of samples 424 * dt, sr Duration and samplerate 425 * x, d Middle half of IMDCT coefficients and delayed samples 426 * y, d Output samples and delayed ones 427 */ 428 static void imdct_window(enum lc3_dt dt, enum lc3_srate sr, 429 const float *x, float *d, float *y) 430 { 431 /* The full MDCT coefficients is given by symmetry : 432 * T[ 0 .. n/4-1] = -half[n/4-1 .. 0 ] 433 * T[ n/4 .. n/2-1] = half[0 .. n/4-1] 434 * T[ n/2 .. 3n/4-1] = half[n/4 .. n/2-1] 435 * T[3n/4 .. n-1] = half[n/2-1 .. n/4 ] */ 436 437 int n4 = LC3_NS(dt, sr) >> 1, nd = LC3_ND(dt, sr); 438 const float *w2 = lc3_mdct_win[dt][sr], *w0 = w2 + 3*n4, *w1 = w0; 439 440 const float *x0 = d + nd-n4, *x1 = x0; 441 float *y0 = y + nd-n4, *y1 = y0, *y2 = d + nd, *y3 = d; 442 443 while (y0 > y) { 444 *(--y0) = *(--x0) - *(x ) * *(w1++); 445 *(y1++) = *(x1++) + *(x++) * *(--w0); 446 } 447 448 while (y1 < y + nd) { 449 *(y1++) = *(x1++) + *(x++) * *(--w0); 450 } 451 452 while (y1 < y + 2*n4) { 453 *(y1++) = *(x ) * *(--w0); 454 *(--y2) = *(x++) * *(w2++); 455 } 456 457 while (y2 > y3) { 458 *(y3++) = *(x ) * *(--w0); 459 *(--y2) = *(x++) * *(w2++); 460 } 461 } 462 463 /** 464 * Forward MDCT transformation 465 */ 466 void lc3_mdct_forward(enum lc3_dt dt, enum lc3_srate sr, 467 enum lc3_srate sr_dst, const float *x, float *y) 468 { 469 const struct lc3_mdct_rot_def *rot = lc3_mdct_rot[dt][sr]; 470 int nf = LC3_NS(dt, sr_dst); 471 int ns = LC3_NS(dt, sr); 472 473 union { float *f; struct lc3_complex *z; } u = { .f = y }; 474 struct lc3_complex z[ns/2]; 475 476 mdct_window(dt, sr, x, u.f); 477 478 mdct_pre_fft(rot, u.f, u.z); 479 u.z = fft(false, u.z, ns/2, u.z, z); 480 mdct_post_fft(rot, u.z, y, sqrtf( (2.f*nf) / (ns*ns) )); 481 } 482 483 /** 484 * Inverse MDCT transformation 485 */ 486 void lc3_mdct_inverse(enum lc3_dt dt, enum lc3_srate sr, 487 enum lc3_srate sr_src, const float *x, float *d, float *y) 488 { 489 const struct lc3_mdct_rot_def *rot = lc3_mdct_rot[dt][sr]; 490 int nf = LC3_NS(dt, sr_src); 491 int ns = LC3_NS(dt, sr); 492 493 struct lc3_complex buffer[ns/2]; 494 struct lc3_complex *z = (struct lc3_complex *)y; 495 union { float *f; struct lc3_complex *z; } u = { .z = buffer }; 496 497 imdct_pre_fft(rot, x, z); 498 z = fft(true, z, ns/2, z, u.z); 499 imdct_post_fft(rot, z, u.f, sqrtf(2.f / nf)); 500 501 imdct_window(dt, sr, u.f, d, y); 502 } 503