1 /****************************************************************************** 2 * 3 * Copyright 2022 Google LLC 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ******************************************************************************/ 18 19 #include "tables.h" 20 21 #include "mdct_neon.h" 22 23 24 /* ---------------------------------------------------------------------------- 25 * FFT processing 26 * -------------------------------------------------------------------------- */ 27 28 /** 29 * FFT 5 Points 30 * x, y Input and output coefficients, of size 5xn 31 * n Number of interleaved transform to perform (n % 2 = 0) 32 */ 33 #ifndef fft_5 34 LC3_HOT static inline void fft_5( 35 const struct lc3_complex *x, struct lc3_complex *y, int n) 36 { 37 static const float cos1 = 0.3090169944; /* cos(-2Pi 1/5) */ 38 static const float cos2 = -0.8090169944; /* cos(-2Pi 2/5) */ 39 40 static const float sin1 = -0.9510565163; /* sin(-2Pi 1/5) */ 41 static const float sin2 = -0.5877852523; /* sin(-2Pi 2/5) */ 42 43 for (int i = 0; i < n; i++, x++, y+= 5) { 44 45 struct lc3_complex s14 = 46 { x[1*n].re + x[4*n].re, x[1*n].im + x[4*n].im }; 47 struct lc3_complex d14 = 48 { x[1*n].re - x[4*n].re, x[1*n].im - x[4*n].im }; 49 50 struct lc3_complex s23 = 51 { x[2*n].re + x[3*n].re, x[2*n].im + x[3*n].im }; 52 struct lc3_complex d23 = 53 { x[2*n].re - x[3*n].re, x[2*n].im - x[3*n].im }; 54 55 y[0].re = x[0].re + s14.re + s23.re; 56 57 y[0].im = x[0].im + s14.im + s23.im; 58 59 y[1].re = x[0].re + s14.re * cos1 - d14.im * sin1 60 + s23.re * cos2 - d23.im * sin2; 61 62 y[1].im = x[0].im + s14.im * cos1 + d14.re * sin1 63 + s23.im * cos2 + d23.re * sin2; 64 65 y[2].re = x[0].re + s14.re * cos2 - d14.im * sin2 66 + s23.re * cos1 + d23.im * sin1; 67 68 y[2].im = x[0].im + s14.im * cos2 + d14.re * sin2 69 + s23.im * cos1 - d23.re * sin1; 70 71 y[3].re = x[0].re + s14.re * cos2 + d14.im * sin2 72 + s23.re * cos1 - d23.im * sin1; 73 74 y[3].im = x[0].im + s14.im * cos2 - d14.re * sin2 75 + s23.im * cos1 + d23.re * sin1; 76 77 y[4].re = x[0].re + s14.re * cos1 + d14.im * sin1 78 + s23.re * cos2 + d23.im * sin2; 79 80 y[4].im = x[0].im + s14.im * cos1 - d14.re * sin1 81 + s23.im * cos2 - d23.re * sin2; 82 } 83 } 84 #endif /* fft_5 */ 85 86 /** 87 * FFT Butterfly 3 Points 88 * x, y Input and output coefficients 89 * twiddles Twiddles factors, determine size of transform 90 * n Number of interleaved transforms 91 */ 92 #ifndef fft_bf3 93 LC3_HOT static inline void fft_bf3( 94 const struct lc3_fft_bf3_twiddles *twiddles, 95 const struct lc3_complex *x, struct lc3_complex *y, int n) 96 { 97 int n3 = twiddles->n3; 98 const struct lc3_complex (*w0)[2] = twiddles->t; 99 const struct lc3_complex (*w1)[2] = w0 + n3, (*w2)[2] = w1 + n3; 100 101 const struct lc3_complex *x0 = x, *x1 = x0 + n*n3, *x2 = x1 + n*n3; 102 struct lc3_complex *y0 = y, *y1 = y0 + n3, *y2 = y1 + n3; 103 104 for (int i = 0; i < n; i++, y0 += 3*n3, y1 += 3*n3, y2 += 3*n3) 105 for (int j = 0; j < n3; j++, x0++, x1++, x2++) { 106 107 y0[j].re = x0->re + x1->re * w0[j][0].re - x1->im * w0[j][0].im 108 + x2->re * w0[j][1].re - x2->im * w0[j][1].im; 109 110 y0[j].im = x0->im + x1->im * w0[j][0].re + x1->re * w0[j][0].im 111 + x2->im * w0[j][1].re + x2->re * w0[j][1].im; 112 113 y1[j].re = x0->re + x1->re * w1[j][0].re - x1->im * w1[j][0].im 114 + x2->re * w1[j][1].re - x2->im * w1[j][1].im; 115 116 y1[j].im = x0->im + x1->im * w1[j][0].re + x1->re * w1[j][0].im 117 + x2->im * w1[j][1].re + x2->re * w1[j][1].im; 118 119 y2[j].re = x0->re + x1->re * w2[j][0].re - x1->im * w2[j][0].im 120 + x2->re * w2[j][1].re - x2->im * w2[j][1].im; 121 122 y2[j].im = x0->im + x1->im * w2[j][0].re + x1->re * w2[j][0].im 123 + x2->im * w2[j][1].re + x2->re * w2[j][1].im; 124 } 125 } 126 #endif /* fft_bf3 */ 127 128 /** 129 * FFT Butterfly 2 Points 130 * twiddles Twiddles factors, determine size of transform 131 * x, y Input and output coefficients 132 * n Number of interleaved transforms 133 */ 134 #ifndef fft_bf2 135 LC3_HOT static inline void fft_bf2( 136 const struct lc3_fft_bf2_twiddles *twiddles, 137 const struct lc3_complex *x, struct lc3_complex *y, int n) 138 { 139 int n2 = twiddles->n2; 140 const struct lc3_complex *w = twiddles->t; 141 142 const struct lc3_complex *x0 = x, *x1 = x0 + n*n2; 143 struct lc3_complex *y0 = y, *y1 = y0 + n2; 144 145 for (int i = 0; i < n; i++, y0 += 2*n2, y1 += 2*n2) { 146 147 for (int j = 0; j < n2; j++, x0++, x1++) { 148 149 y0[j].re = x0->re + x1->re * w[j].re - x1->im * w[j].im; 150 y0[j].im = x0->im + x1->im * w[j].re + x1->re * w[j].im; 151 152 y1[j].re = x0->re - x1->re * w[j].re + x1->im * w[j].im; 153 y1[j].im = x0->im - x1->im * w[j].re - x1->re * w[j].im; 154 } 155 } 156 } 157 #endif /* fft_bf2 */ 158 159 /** 160 * Perform FFT 161 * x, y0, y1 Input, and 2 scratch buffers of size `n` 162 * n Number of points 30, 40, 60, 80, 90, 120, 160, 180, 240 163 * return The buffer `y0` or `y1` that hold the result 164 * 165 * Input `x` can be the same as the `y0` second scratch buffer 166 */ 167 static struct lc3_complex *fft(const struct lc3_complex *x, int n, 168 struct lc3_complex *y0, struct lc3_complex *y1) 169 { 170 struct lc3_complex *y[2] = { y1, y0 }; 171 int i2, i3, is = 0; 172 173 /* The number of points `n` can be decomposed as : 174 * 175 * n = 5^1 * 3^n3 * 2^n2 176 * 177 * for n = 40, 80, 160 n3 = 0, n2 = [3..5] 178 * n = 30, 60, 120, 240 n3 = 1, n2 = [1..4] 179 * n = 90, 180 n3 = 2, n2 = [1..2] 180 * 181 * Note that the expression `n & (n-1) == 0` is equivalent 182 * to the check that `n` is a power of 2. */ 183 184 fft_5(x, y[is], n /= 5); 185 186 for (i3 = 0; n & (n-1); i3++, is ^= 1) 187 fft_bf3(lc3_fft_twiddles_bf3[i3], y[is], y[is ^ 1], n /= 3); 188 189 for (i2 = 0; n > 1; i2++, is ^= 1) 190 fft_bf2(lc3_fft_twiddles_bf2[i2][i3], y[is], y[is ^ 1], n >>= 1); 191 192 return y[is]; 193 } 194 195 196 /* ---------------------------------------------------------------------------- 197 * MDCT processing 198 * -------------------------------------------------------------------------- */ 199 200 /** 201 * Windowing of samples before MDCT 202 * dt, sr Duration and samplerate (size of the transform) 203 * x, y Input current and delayed samples 204 * y, d Output windowed samples, and delayed ones 205 */ 206 LC3_HOT static void mdct_window(enum lc3_dt dt, enum lc3_srate sr, 207 const float *x, float *d, float *y) 208 { 209 int ns = LC3_NS(dt, sr), nd = LC3_ND(dt, sr); 210 211 const float *w0 = lc3_mdct_win[dt][sr], *w1 = w0 + ns; 212 const float *w2 = w1, *w3 = w2 + nd; 213 214 const float *x0 = x + ns-nd, *x1 = x0; 215 float *y0 = y + ns/2, *y1 = y0; 216 float *d0 = d, *d1 = d + nd; 217 218 while (x1 > x) { 219 *(--y0) = *d0 * *(w0++) - *(--x1) * *(--w1); 220 *(y1++) = (*(d0++) = *(x0++)) * *(w2++); 221 222 *(--y0) = *d0 * *(w0++) - *(--x1) * *(--w1); 223 *(y1++) = (*(d0++) = *(x0++)) * *(w2++); 224 } 225 226 for (x1 += ns; x0 < x1; ) { 227 *(--y0) = *d0 * *(w0++) - *(--d1) * *(--w1); 228 *(y1++) = (*(d0++) = *(x0++)) * *(w2++) + (*d1 = *(--x1)) * *(--w3); 229 230 *(--y0) = *d0 * *(w0++) - *(--d1) * *(--w1); 231 *(y1++) = (*(d0++) = *(x0++)) * *(w2++) + (*d1 = *(--x1)) * *(--w3); 232 } 233 } 234 235 /** 236 * Pre-rotate MDCT coefficients of N/2 points, before FFT N/4 points FFT 237 * def Size and twiddles factors 238 * x, y Input and output coefficients 239 * 240 * `x` and y` can be the same buffer 241 */ 242 LC3_HOT static void mdct_pre_fft(const struct lc3_mdct_rot_def *def, 243 const float *x, struct lc3_complex *y) 244 { 245 int n4 = def->n4; 246 247 const float *x0 = x, *x1 = x0 + 2*n4; 248 const struct lc3_complex *w0 = def->w, *w1 = w0 + n4; 249 struct lc3_complex *y0 = y, *y1 = y0 + n4; 250 251 while (x0 < x1) { 252 struct lc3_complex u, uw = *(w0++); 253 u.re = - *(--x1) * uw.re + *x0 * uw.im; 254 u.im = *(x0++) * uw.re + *x1 * uw.im; 255 256 struct lc3_complex v, vw = *(--w1); 257 v.re = - *(--x1) * vw.im + *x0 * vw.re; 258 v.im = - *(x0++) * vw.im - *x1 * vw.re; 259 260 *(y0++) = u; 261 *(--y1) = v; 262 } 263 } 264 265 /** 266 * Post-rotate FFT N/4 points coefficients, resulting MDCT N points 267 * def Size and twiddles factors 268 * x, y Input and output coefficients 269 * scale Scale on output coefficients 270 * 271 * `x` and y` can be the same buffer 272 */ 273 LC3_HOT static void mdct_post_fft(const struct lc3_mdct_rot_def *def, 274 const struct lc3_complex *x, float *y, float scale) 275 { 276 int n4 = def->n4, n8 = n4 >> 1; 277 278 const struct lc3_complex *w0 = def->w + n8, *w1 = w0 - 1; 279 const struct lc3_complex *x0 = x + n8, *x1 = x0 - 1; 280 281 float *y0 = y + n4, *y1 = y0; 282 283 for ( ; y1 > y; x0++, x1--, w0++, w1--) { 284 285 float u0 = (x0->im * w0->im + x0->re * w0->re) * scale; 286 float u1 = (x1->re * w1->im - x1->im * w1->re) * scale; 287 288 float v0 = (x0->re * w0->im - x0->im * w0->re) * scale; 289 float v1 = (x1->im * w1->im + x1->re * w1->re) * scale; 290 291 *(y0++) = u0; *(y0++) = u1; 292 *(--y1) = v0; *(--y1) = v1; 293 } 294 } 295 296 /** 297 * Pre-rotate IMDCT coefficients of N points, before FFT N/4 points FFT 298 * def Size and twiddles factors 299 * x, y Input and output coefficients 300 * 301 * `x` and `y` can be the same buffer 302 * The real and imaginary parts of `y` are swapped, 303 * to operate on FFT instead of IFFT 304 */ 305 LC3_HOT static void imdct_pre_fft(const struct lc3_mdct_rot_def *def, 306 const float *x, struct lc3_complex *y) 307 { 308 int n4 = def->n4; 309 310 const float *x0 = x, *x1 = x0 + 2*n4; 311 312 const struct lc3_complex *w0 = def->w, *w1 = w0 + n4; 313 struct lc3_complex *y0 = y, *y1 = y0 + n4; 314 315 while (x0 < x1) { 316 float u0 = *(x0++), u1 = *(--x1); 317 float v0 = *(x0++), v1 = *(--x1); 318 struct lc3_complex uw = *(w0++), vw = *(--w1); 319 320 (y0 )->re = - u0 * uw.re - u1 * uw.im; 321 (y0++)->im = - u1 * uw.re + u0 * uw.im; 322 323 (--y1)->re = - v1 * vw.re - v0 * vw.im; 324 ( y1)->im = - v0 * vw.re + v1 * vw.im; 325 } 326 } 327 328 /** 329 * Post-rotate FFT N/4 points coefficients, resulting IMDCT N points 330 * def Size and twiddles factors 331 * x, y Input and output coefficients 332 * scale Scale on output coefficients 333 * 334 * `x` and y` can be the same buffer 335 * The real and imaginary parts of `x` are swapped, 336 * to operate on FFT instead of IFFT 337 */ 338 LC3_HOT static void imdct_post_fft(const struct lc3_mdct_rot_def *def, 339 const struct lc3_complex *x, float *y, float scale) 340 { 341 int n4 = def->n4; 342 343 const struct lc3_complex *w0 = def->w, *w1 = w0 + n4; 344 const struct lc3_complex *x0 = x, *x1 = x0 + n4; 345 346 float *y0 = y, *y1 = y0 + 2*n4; 347 348 while (x0 < x1) { 349 struct lc3_complex uz = *(x0++), vz = *(--x1); 350 struct lc3_complex uw = *(w0++), vw = *(--w1); 351 352 *(y0++) = (uz.re * uw.im - uz.im * uw.re) * scale; 353 *(--y1) = (uz.re * uw.re + uz.im * uw.im) * scale; 354 355 *(--y1) = (vz.re * vw.im - vz.im * vw.re) * scale; 356 *(y0++) = (vz.re * vw.re + vz.im * vw.im) * scale; 357 } 358 } 359 360 /** 361 * Apply windowing of samples 362 * dt, sr Duration and samplerate 363 * x, d Middle half of IMDCT coefficients and delayed samples 364 * y, d Output samples and delayed ones 365 */ 366 LC3_HOT static void imdct_window(enum lc3_dt dt, enum lc3_srate sr, 367 const float *x, float *d, float *y) 368 { 369 /* The full MDCT coefficients is given by symmetry : 370 * T[ 0 .. n/4-1] = -half[n/4-1 .. 0 ] 371 * T[ n/4 .. n/2-1] = half[0 .. n/4-1] 372 * T[ n/2 .. 3n/4-1] = half[n/4 .. n/2-1] 373 * T[3n/4 .. n-1] = half[n/2-1 .. n/4 ] */ 374 375 int n4 = LC3_NS(dt, sr) >> 1, nd = LC3_ND(dt, sr); 376 const float *w2 = lc3_mdct_win[dt][sr], *w0 = w2 + 3*n4, *w1 = w0; 377 378 const float *x0 = d + nd-n4, *x1 = x0; 379 float *y0 = y + nd-n4, *y1 = y0, *y2 = d + nd, *y3 = d; 380 381 while (y0 > y) { 382 *(--y0) = *(--x0) - *(x ) * *(w1++); 383 *(y1++) = *(x1++) + *(x++) * *(--w0); 384 385 *(--y0) = *(--x0) - *(x ) * *(w1++); 386 *(y1++) = *(x1++) + *(x++) * *(--w0); 387 } 388 389 while (y1 < y + nd) { 390 *(y1++) = *(x1++) + *(x++) * *(--w0); 391 *(y1++) = *(x1++) + *(x++) * *(--w0); 392 } 393 394 while (y1 < y + 2*n4) { 395 *(y1++) = *(x ) * *(--w0); 396 *(--y2) = *(x++) * *(w2++); 397 398 *(y1++) = *(x ) * *(--w0); 399 *(--y2) = *(x++) * *(w2++); 400 } 401 402 while (y2 > y3) { 403 *(y3++) = *(x ) * *(--w0); 404 *(--y2) = *(x++) * *(w2++); 405 406 *(y3++) = *(x ) * *(--w0); 407 *(--y2) = *(x++) * *(w2++); 408 } 409 } 410 411 /** 412 * Forward MDCT transformation 413 */ 414 void lc3_mdct_forward(enum lc3_dt dt, enum lc3_srate sr, 415 enum lc3_srate sr_dst, const float *x, float *d, float *y) 416 { 417 const struct lc3_mdct_rot_def *rot = lc3_mdct_rot[dt][sr]; 418 int nf = LC3_NS(dt, sr_dst); 419 int ns = LC3_NS(dt, sr); 420 421 struct lc3_complex buffer[ns/2]; 422 struct lc3_complex *z = (struct lc3_complex *)y; 423 union { float *f; struct lc3_complex *z; } u = { .z = buffer }; 424 425 mdct_window(dt, sr, x, d, u.f); 426 427 mdct_pre_fft(rot, u.f, u.z); 428 u.z = fft(u.z, ns/2, u.z, z); 429 mdct_post_fft(rot, u.z, y, sqrtf( (2.f*nf) / (ns*ns) )); 430 } 431 432 /** 433 * Inverse MDCT transformation 434 */ 435 void lc3_mdct_inverse(enum lc3_dt dt, enum lc3_srate sr, 436 enum lc3_srate sr_src, const float *x, float *d, float *y) 437 { 438 const struct lc3_mdct_rot_def *rot = lc3_mdct_rot[dt][sr]; 439 int nf = LC3_NS(dt, sr_src); 440 int ns = LC3_NS(dt, sr); 441 442 struct lc3_complex buffer[ns/2]; 443 struct lc3_complex *z = (struct lc3_complex *)y; 444 union { float *f; struct lc3_complex *z; } u = { .z = buffer }; 445 446 imdct_pre_fft(rot, x, z); 447 z = fft(z, ns/2, z, u.z); 448 imdct_post_fft(rot, z, u.f, sqrtf(2.f / nf)); 449 450 imdct_window(dt, sr, u.f, d, y); 451 } 452