1 /****************************************************************************** 2 * 3 * Copyright 2022 Google LLC 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ******************************************************************************/ 18 19 #include "mdct.h" 20 #include "tables.h" 21 22 #include "mdct_neon.h" 23 24 25 /* ---------------------------------------------------------------------------- 26 * FFT processing 27 * -------------------------------------------------------------------------- */ 28 29 /** 30 * FFT 5 Points 31 * x, y Input and output coefficients, of size 5xn 32 * n Number of interleaved transform to perform (n % 2 = 0) 33 */ 34 #ifndef fft_5 35 LC3_HOT static inline void fft_5( 36 const struct lc3_complex *x, struct lc3_complex *y, int n) 37 { 38 static const float cos1 = 0.3090169944; /* cos(-2Pi 1/5) */ 39 static const float cos2 = -0.8090169944; /* cos(-2Pi 2/5) */ 40 41 static const float sin1 = -0.9510565163; /* sin(-2Pi 1/5) */ 42 static const float sin2 = -0.5877852523; /* sin(-2Pi 2/5) */ 43 44 for (int i = 0; i < n; i++, x++, y+= 5) { 45 46 struct lc3_complex s14 = 47 { x[1*n].re + x[4*n].re, x[1*n].im + x[4*n].im }; 48 struct lc3_complex d14 = 49 { x[1*n].re - x[4*n].re, x[1*n].im - x[4*n].im }; 50 51 struct lc3_complex s23 = 52 { x[2*n].re + x[3*n].re, x[2*n].im + x[3*n].im }; 53 struct lc3_complex d23 = 54 { x[2*n].re - x[3*n].re, x[2*n].im - x[3*n].im }; 55 56 y[0].re = x[0].re + s14.re + s23.re; 57 58 y[0].im = x[0].im + s14.im + s23.im; 59 60 y[1].re = x[0].re + s14.re * cos1 - d14.im * sin1 61 + s23.re * cos2 - d23.im * sin2; 62 63 y[1].im = x[0].im + s14.im * cos1 + d14.re * sin1 64 + s23.im * cos2 + d23.re * sin2; 65 66 y[2].re = x[0].re + s14.re * cos2 - d14.im * sin2 67 + s23.re * cos1 + d23.im * sin1; 68 69 y[2].im = x[0].im + s14.im * cos2 + d14.re * sin2 70 + s23.im * cos1 - d23.re * sin1; 71 72 y[3].re = x[0].re + s14.re * cos2 + d14.im * sin2 73 + s23.re * cos1 - d23.im * sin1; 74 75 y[3].im = x[0].im + s14.im * cos2 - d14.re * sin2 76 + s23.im * cos1 + d23.re * sin1; 77 78 y[4].re = x[0].re + s14.re * cos1 + d14.im * sin1 79 + s23.re * cos2 + d23.im * sin2; 80 81 y[4].im = x[0].im + s14.im * cos1 - d14.re * sin1 82 + s23.im * cos2 - d23.re * sin2; 83 } 84 } 85 #endif /* fft_5 */ 86 87 /** 88 * FFT Butterfly 3 Points 89 * x, y Input and output coefficients 90 * twiddles Twiddles factors, determine size of transform 91 * n Number of interleaved transforms 92 */ 93 #ifndef fft_bf3 94 LC3_HOT static inline void fft_bf3( 95 const struct lc3_fft_bf3_twiddles *twiddles, 96 const struct lc3_complex *x, struct lc3_complex *y, int n) 97 { 98 int n3 = twiddles->n3; 99 const struct lc3_complex (*w0)[2] = twiddles->t; 100 const struct lc3_complex (*w1)[2] = w0 + n3, (*w2)[2] = w1 + n3; 101 102 const struct lc3_complex *x0 = x, *x1 = x0 + n*n3, *x2 = x1 + n*n3; 103 struct lc3_complex *y0 = y, *y1 = y0 + n3, *y2 = y1 + n3; 104 105 for (int i = 0; i < n; i++, y0 += 3*n3, y1 += 3*n3, y2 += 3*n3) 106 for (int j = 0; j < n3; j++, x0++, x1++, x2++) { 107 108 y0[j].re = x0->re + x1->re * w0[j][0].re - x1->im * w0[j][0].im 109 + x2->re * w0[j][1].re - x2->im * w0[j][1].im; 110 111 y0[j].im = x0->im + x1->im * w0[j][0].re + x1->re * w0[j][0].im 112 + x2->im * w0[j][1].re + x2->re * w0[j][1].im; 113 114 y1[j].re = x0->re + x1->re * w1[j][0].re - x1->im * w1[j][0].im 115 + x2->re * w1[j][1].re - x2->im * w1[j][1].im; 116 117 y1[j].im = x0->im + x1->im * w1[j][0].re + x1->re * w1[j][0].im 118 + x2->im * w1[j][1].re + x2->re * w1[j][1].im; 119 120 y2[j].re = x0->re + x1->re * w2[j][0].re - x1->im * w2[j][0].im 121 + x2->re * w2[j][1].re - x2->im * w2[j][1].im; 122 123 y2[j].im = x0->im + x1->im * w2[j][0].re + x1->re * w2[j][0].im 124 + x2->im * w2[j][1].re + x2->re * w2[j][1].im; 125 } 126 } 127 #endif /* fft_bf3 */ 128 129 /** 130 * FFT Butterfly 2 Points 131 * twiddles Twiddles factors, determine size of transform 132 * x, y Input and output coefficients 133 * n Number of interleaved transforms 134 */ 135 #ifndef fft_bf2 136 LC3_HOT static inline void fft_bf2( 137 const struct lc3_fft_bf2_twiddles *twiddles, 138 const struct lc3_complex *x, struct lc3_complex *y, int n) 139 { 140 int n2 = twiddles->n2; 141 const struct lc3_complex *w = twiddles->t; 142 143 const struct lc3_complex *x0 = x, *x1 = x0 + n*n2; 144 struct lc3_complex *y0 = y, *y1 = y0 + n2; 145 146 for (int i = 0; i < n; i++, y0 += 2*n2, y1 += 2*n2) { 147 148 for (int j = 0; j < n2; j++, x0++, x1++) { 149 150 y0[j].re = x0->re + x1->re * w[j].re - x1->im * w[j].im; 151 y0[j].im = x0->im + x1->im * w[j].re + x1->re * w[j].im; 152 153 y1[j].re = x0->re - x1->re * w[j].re + x1->im * w[j].im; 154 y1[j].im = x0->im - x1->im * w[j].re - x1->re * w[j].im; 155 } 156 } 157 } 158 #endif /* fft_bf2 */ 159 160 /** 161 * Perform FFT 162 * x, y0, y1 Input, and 2 scratch buffers of size `n` 163 * n Number of points 30, 40, 60, 80, 90, 120, 160, 180, 240, 480 164 * return The buffer `y0` or `y1` that hold the result 165 * 166 * Input `x` can be the same as the `y0` second scratch buffer 167 */ 168 static struct lc3_complex *fft(const struct lc3_complex *x, int n, 169 struct lc3_complex *y0, struct lc3_complex *y1) 170 { 171 struct lc3_complex *y[2] = { y1, y0 }; 172 int i2, i3, is = 0; 173 174 /* The number of points `n` can be decomposed as : 175 * 176 * n = 5^1 * 3^n3 * 2^n2 177 * 178 * for n = 10, 20, 40, 80, 160 n3 = 0, n2 = [1..5] 179 * n = 30, 60, 120, 240, 480 n3 = 1, n2 = [1..5] 180 * n = 90, 180 n3 = 2, n2 = [1..2] 181 * 182 * Note that the expression `n & (n-1) == 0` is equivalent 183 * to the check that `n` is a power of 2. */ 184 185 fft_5(x, y[is], n /= 5); 186 187 for (i3 = 0; n & (n-1); i3++, is ^= 1) 188 fft_bf3(lc3_fft_twiddles_bf3[i3], y[is], y[is ^ 1], n /= 3); 189 190 for (i2 = 0; n > 1; i2++, is ^= 1) 191 fft_bf2(lc3_fft_twiddles_bf2[i2][i3], y[is], y[is ^ 1], n >>= 1); 192 193 return y[is]; 194 } 195 196 197 /* ---------------------------------------------------------------------------- 198 * MDCT processing 199 * -------------------------------------------------------------------------- */ 200 201 /** 202 * Windowing of samples before MDCT 203 * dt, sr Duration and samplerate 204 * x, y Input current and delayed samples 205 * y, d Output windowed samples, and delayed ones 206 */ 207 LC3_HOT static void mdct_window( 208 enum lc3_dt dt, enum lc3_srate sr, 209 const float *x, float *d, float *y) 210 { 211 const float *win = lc3_mdct_win[dt][sr]; 212 int ns = lc3_ns(dt, sr), nd = lc3_nd(dt, sr); 213 214 const float *w0 = win, *w1 = w0 + ns; 215 const float *w2 = w1, *w3 = w2 + nd; 216 217 const float *x0 = x + ns-nd, *x1 = x0; 218 float *y0 = y + ns/2, *y1 = y0; 219 float *d0 = d, *d1 = d + nd; 220 221 while (x1 > x) { 222 *(--y0) = *d0 * *(w0++) - *(--x1) * *(--w1); 223 *(y1++) = (*(d0++) = *(x0++)) * *(w2++); 224 225 *(--y0) = *d0 * *(w0++) - *(--x1) * *(--w1); 226 *(y1++) = (*(d0++) = *(x0++)) * *(w2++); 227 } 228 229 for (x1 += ns; x0 < x1; ) { 230 *(--y0) = *d0 * *(w0++) - *(--d1) * *(--w1); 231 *(y1++) = (*(d0++) = *(x0++)) * *(w2++) + (*d1 = *(--x1)) * *(--w3); 232 233 *(--y0) = *d0 * *(w0++) - *(--d1) * *(--w1); 234 *(y1++) = (*(d0++) = *(x0++)) * *(w2++) + (*d1 = *(--x1)) * *(--w3); 235 } 236 } 237 238 /** 239 * Pre-rotate MDCT coefficients of N/2 points, before FFT N/4 points FFT 240 * def Size and twiddles factors 241 * x, y Input and output coefficients 242 * 243 * `x` and y` can be the same buffer 244 */ 245 LC3_HOT static void mdct_pre_fft(const struct lc3_mdct_rot_def *def, 246 const float *x, struct lc3_complex *y) 247 { 248 int n4 = def->n4; 249 250 const float *x0 = x, *x1 = x0 + 2*n4; 251 const struct lc3_complex *w0 = def->w, *w1 = w0 + n4; 252 struct lc3_complex *y0 = y, *y1 = y0 + n4; 253 254 while (x0 < x1) { 255 struct lc3_complex u, uw = *(w0++); 256 u.re = - *(--x1) * uw.re + *x0 * uw.im; 257 u.im = *(x0++) * uw.re + *x1 * uw.im; 258 259 struct lc3_complex v, vw = *(--w1); 260 v.re = - *(--x1) * vw.im + *x0 * vw.re; 261 v.im = - *(x0++) * vw.im - *x1 * vw.re; 262 263 *(y0++) = u; 264 *(--y1) = v; 265 } 266 } 267 268 /** 269 * Post-rotate FFT N/4 points coefficients, resulting MDCT N points 270 * def Size and twiddles factors 271 * x, y Input and output coefficients 272 * 273 * `x` and y` can be the same buffer 274 */ 275 LC3_HOT static void mdct_post_fft(const struct lc3_mdct_rot_def *def, 276 const struct lc3_complex *x, float *y) 277 { 278 int n4 = def->n4, n8 = n4 >> 1; 279 280 const struct lc3_complex *w0 = def->w + n8, *w1 = w0 - 1; 281 const struct lc3_complex *x0 = x + n8, *x1 = x0 - 1; 282 283 float *y0 = y + n4, *y1 = y0; 284 285 for ( ; y1 > y; x0++, x1--, w0++, w1--) { 286 287 float u0 = x0->im * w0->im + x0->re * w0->re; 288 float u1 = x1->re * w1->im - x1->im * w1->re; 289 290 float v0 = x0->re * w0->im - x0->im * w0->re; 291 float v1 = x1->im * w1->im + x1->re * w1->re; 292 293 *(y0++) = u0; *(y0++) = u1; 294 *(--y1) = v0; *(--y1) = v1; 295 } 296 } 297 298 /** 299 * Pre-rotate IMDCT coefficients of N points, before FFT N/4 points FFT 300 * def Size and twiddles factors 301 * x, y Input and output coefficients 302 * 303 * `x` and `y` can be the same buffer 304 * The real and imaginary parts of `y` are swapped, 305 * to operate on FFT instead of IFFT 306 */ 307 LC3_HOT static void imdct_pre_fft(const struct lc3_mdct_rot_def *def, 308 const float *x, struct lc3_complex *y) 309 { 310 int n4 = def->n4; 311 312 const float *x0 = x, *x1 = x0 + 2*n4; 313 314 const struct lc3_complex *w0 = def->w, *w1 = w0 + n4; 315 struct lc3_complex *y0 = y, *y1 = y0 + n4; 316 317 while (x0 < x1) { 318 float u0 = *(x0++), u1 = *(--x1); 319 float v0 = *(x0++), v1 = *(--x1); 320 struct lc3_complex uw = *(w0++), vw = *(--w1); 321 322 (y0 )->re = - u0 * uw.re - u1 * uw.im; 323 (y0++)->im = - u1 * uw.re + u0 * uw.im; 324 325 (--y1)->re = - v1 * vw.re - v0 * vw.im; 326 ( y1)->im = - v0 * vw.re + v1 * vw.im; 327 } 328 } 329 330 /** 331 * Post-rotate FFT N/4 points coefficients, resulting IMDCT N points 332 * def Size and twiddles factors 333 * x, y Input and output coefficients 334 * 335 * `x` and y` can be the same buffer 336 * The real and imaginary parts of `x` are swapped, 337 * to operate on FFT instead of IFFT 338 */ 339 LC3_HOT static void imdct_post_fft(const struct lc3_mdct_rot_def *def, 340 const struct lc3_complex *x, float *y) 341 { 342 int n4 = def->n4; 343 344 const struct lc3_complex *w0 = def->w, *w1 = w0 + n4; 345 const struct lc3_complex *x0 = x, *x1 = x0 + n4; 346 347 float *y0 = y, *y1 = y0 + 2*n4; 348 349 while (x0 < x1) { 350 struct lc3_complex uz = *(x0++), vz = *(--x1); 351 struct lc3_complex uw = *(w0++), vw = *(--w1); 352 353 *(y0++) = uz.re * uw.im - uz.im * uw.re; 354 *(--y1) = uz.re * uw.re + uz.im * uw.im; 355 356 *(--y1) = vz.re * vw.im - vz.im * vw.re; 357 *(y0++) = vz.re * vw.re + vz.im * vw.im; 358 } 359 } 360 361 /** 362 * Apply windowing of samples 363 * dt, sr Duration and samplerate 364 * x, d Middle half of IMDCT coefficients and delayed samples 365 * y, d Output samples and delayed ones 366 */ 367 LC3_HOT static void imdct_window( 368 enum lc3_dt dt, enum lc3_srate sr, 369 const float *x, float *d, float *y) 370 { 371 /* The full MDCT coefficients is given by symmetry : 372 * T[ 0 .. n/4-1] = -half[n/4-1 .. 0 ] 373 * T[ n/4 .. n/2-1] = half[0 .. n/4-1] 374 * T[ n/2 .. 3n/4-1] = half[n/4 .. n/2-1] 375 * T[3n/4 .. n-1] = half[n/2-1 .. n/4 ] */ 376 377 const float *win = lc3_mdct_win[dt][sr]; 378 int n4 = lc3_ns(dt, sr) >> 1, nd = lc3_nd(dt, sr); 379 const float *w2 = win, *w0 = w2 + 3*n4, *w1 = w0; 380 381 const float *x0 = d + nd-n4, *x1 = x0; 382 float *y0 = y + nd-n4, *y1 = y0, *y2 = d + nd, *y3 = d; 383 384 while (y0 > y) { 385 *(--y0) = *(--x0) - *(x ) * *(w1++); 386 *(y1++) = *(x1++) + *(x++) * *(--w0); 387 388 *(--y0) = *(--x0) - *(x ) * *(w1++); 389 *(y1++) = *(x1++) + *(x++) * *(--w0); 390 } 391 392 while (y1 < y + nd) { 393 *(y1++) = *(x1++) + *(x++) * *(--w0); 394 *(y1++) = *(x1++) + *(x++) * *(--w0); 395 } 396 397 while (y1 < y + 2*n4) { 398 *(y1++) = *(x ) * *(--w0); 399 *(--y2) = *(x++) * *(w2++); 400 401 *(y1++) = *(x ) * *(--w0); 402 *(--y2) = *(x++) * *(w2++); 403 } 404 405 while (y2 > y3) { 406 *(y3++) = *(x ) * *(--w0); 407 *(--y2) = *(x++) * *(w2++); 408 409 *(y3++) = *(x ) * *(--w0); 410 *(--y2) = *(x++) * *(w2++); 411 } 412 } 413 414 /** 415 * Rescale samples 416 * x, n Input and count of samples, scaled as output 417 * scale Scale factor 418 */ 419 LC3_HOT static void rescale(float *x, int n, float f) 420 { 421 for (int i = 0; i < (n >> 2); i++) { 422 *(x++) *= f; *(x++) *= f; 423 *(x++) *= f; *(x++) *= f; 424 } 425 } 426 427 /** 428 * Forward MDCT transformation 429 */ 430 void lc3_mdct_forward( 431 enum lc3_dt dt, enum lc3_srate sr, enum lc3_srate sr_dst, 432 const float *x, float *d, float *y) 433 { 434 const struct lc3_mdct_rot_def *rot = lc3_mdct_rot[dt][sr]; 435 int ns_dst = lc3_ns(dt, sr_dst); 436 int ns = lc3_ns(dt, sr); 437 438 struct lc3_complex buffer[LC3_MAX_NS / 2]; 439 struct lc3_complex *z = (struct lc3_complex *)y; 440 union { float *f; struct lc3_complex *z; } u = { .z = buffer }; 441 442 mdct_window(dt, sr, x, d, u.f); 443 444 mdct_pre_fft(rot, u.f, u.z); 445 u.z = fft(u.z, ns/2, u.z, z); 446 mdct_post_fft(rot, u.z, y); 447 448 if (ns != ns_dst) 449 rescale(y, ns_dst, sqrtf((float)ns_dst / ns)); 450 } 451 452 /** 453 * Inverse MDCT transformation 454 */ 455 void lc3_mdct_inverse( 456 enum lc3_dt dt, enum lc3_srate sr, enum lc3_srate sr_src, 457 const float *x, float *d, float *y) 458 { 459 const struct lc3_mdct_rot_def *rot = lc3_mdct_rot[dt][sr]; 460 int ns_src = lc3_ns(dt, sr_src); 461 int ns = lc3_ns(dt, sr); 462 463 struct lc3_complex buffer[LC3_MAX_NS / 2]; 464 struct lc3_complex *z = (struct lc3_complex *)y; 465 union { float *f; struct lc3_complex *z; } u = { .z = buffer }; 466 467 imdct_pre_fft(rot, x, z); 468 z = fft(z, ns/2, z, u.z); 469 imdct_post_fft(rot, z, u.f); 470 471 if (ns != ns_src) 472 rescale(u.f, ns, sqrtf((float)ns / ns_src)); 473 474 imdct_window(dt, sr, u.f, d, y); 475 } 476