1 /****************************************************************************** 2 * 3 * Copyright 2022 Google LLC 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ******************************************************************************/ 18 19 #ifndef __LC3_FASTMATH_H 20 #define __LC3_FASTMATH_H 21 22 #include <stdint.h> 23 #include <float.h> 24 #include <math.h> 25 26 27 /** 28 * IEEE 754 Floating point representation 29 */ 30 31 #define LC3_IEEE754_SIGN_SHL (31) 32 #define LC3_IEEE754_SIGN_MASK (1 << LC3_IEEE754_SIGN_SHL) 33 34 #define LC3_IEEE754_EXP_SHL (23) 35 #define LC3_IEEE754_EXP_MASK (0xff << LC3_IEEE754_EXP_SHL) 36 #define LC3_IEEE754_EXP_BIAS (127) 37 38 39 /** 40 * Fast multiply floating-point number by integral power of 2 41 * x Operand, finite number 42 * exp Exponent such that 2^x is finite 43 * return 2^exp 44 */ 45 static inline float lc3_ldexpf(float _x, int exp) { 46 union { float f; int32_t s; } x = { .f = _x }; 47 48 if (x.s & LC3_IEEE754_EXP_MASK) 49 x.s += exp << LC3_IEEE754_EXP_SHL; 50 51 return x.f; 52 } 53 54 /** 55 * Fast convert floating-point number to fractional and integral components 56 * x Operand, finite number 57 * exp Return the exponent part 58 * return The normalized fraction in [0.5:1[ 59 */ 60 static inline float lc3_frexpf(float _x, int *exp) { 61 union { float f; uint32_t u; } x = { .f = _x }; 62 63 int e = (x.u & LC3_IEEE754_EXP_MASK) >> LC3_IEEE754_EXP_SHL; 64 *exp = e - (LC3_IEEE754_EXP_BIAS - 1); 65 66 x.u = (x.u & ~LC3_IEEE754_EXP_MASK) | 67 ((LC3_IEEE754_EXP_BIAS - 1) << LC3_IEEE754_EXP_SHL); 68 69 return x.f; 70 } 71 72 /** 73 * Fast 2^n approximation 74 * x Operand, range -100 to 100 75 * return 2^x approximation (max relative error ~ 4e-7) 76 */ 77 static inline float lc3_exp2f(float x) 78 { 79 /* --- 2^(i/8) for i from 0 to 7 --- */ 80 81 static const float e[] = { 82 1.00000000e+00, 1.09050773e+00, 1.18920712e+00, 1.29683955e+00, 83 1.41421356e+00, 1.54221083e+00, 1.68179283e+00, 1.83400809e+00 }; 84 85 /* --- Polynomial approx in range 0 to 1/8 --- */ 86 87 static const float p[] = { 88 1.00448128e-02, 5.54563260e-02, 2.40228756e-01, 6.93147140e-01 }; 89 90 /* --- Split the operand --- 91 * 92 * Such as x = k/8 + y, with k an integer, and |y| < 0.5/8 93 * 94 * Note that `fast-math` compiler option leads to rounding error, 95 * disable optimisation with `volatile`. */ 96 97 volatile union { float f; int32_t s; } v; 98 99 v.f = x + 0x1.8p20f; 100 int k = v.s; 101 x -= v.f - 0x1.8p20f; 102 103 /* --- Compute 2^x, with |x| < 1 --- 104 * Perform polynomial approximation in range -0.5/8 to 0.5/8, 105 * and muplity by precomputed value of 2^(i/8), i in [0:7] */ 106 107 union { float f; int32_t s; } y; 108 109 y.f = ( p[0]) * x; 110 y.f = (y.f + p[1]) * x; 111 y.f = (y.f + p[2]) * x; 112 y.f = (y.f + p[3]) * x; 113 y.f = (y.f + 1.f) * e[k & 7]; 114 115 /* --- Add the exponent --- */ 116 117 y.s += (k >> 3) << LC3_IEEE754_EXP_SHL; 118 119 return y.f; 120 } 121 122 /** 123 * Fast log2(x) approximation 124 * x Operand, greater than 0 125 * return log2(x) approximation (max absolute error ~ 1e-4) 126 */ 127 static inline float lc3_log2f(float x) 128 { 129 float y; 130 int e; 131 132 /* --- Polynomial approx in range 0.5 to 1 --- */ 133 134 static const float c[] = { 135 -1.29479677, 5.11769018, -8.42295281, 8.10557963, -3.50567360 }; 136 137 x = lc3_frexpf(x, &e); 138 139 y = ( c[0]) * x; 140 y = (y + c[1]) * x; 141 y = (y + c[2]) * x; 142 y = (y + c[3]) * x; 143 y = (y + c[4]); 144 145 /* --- Add log2f(2^e) and return --- */ 146 147 return e + y; 148 } 149 150 /** 151 * Fast log10(x) approximation 152 * x Operand, greater than 0 153 * return log10(x) approximation (max absolute error ~ 1e-4) 154 */ 155 static inline float lc3_log10f(float x) 156 { 157 return log10f(2) * lc3_log2f(x); 158 } 159 160 /** 161 * Fast `10 * log10(x)` (or dB) approximation in fixed Q16 162 * x Operand, in range 2^-63 to 2^63 (1e-19 to 1e19) 163 * return 10 * log10(x) in fixed Q16 (-190 to 192 dB) 164 * 165 * - The 0 value is accepted and return the minimum value ~ -191dB 166 * - This function assumed that float 32 bits is coded IEEE 754 167 */ 168 static inline int32_t lc3_db_q16(float x) 169 { 170 /* --- Table in Q15 --- */ 171 172 static const uint16_t t[][2] = { 173 174 /* [n][0] = 10 * log10(2) * log2(1 + n/32), with n = [0..15] */ 175 /* [n][1] = [n+1][0] - [n][0] (while defining [16][0]) */ 176 177 { 0, 4379 }, { 4379, 4248 }, { 8627, 4125 }, { 12753, 4009 }, 178 { 16762, 3899 }, { 20661, 3795 }, { 24456, 3697 }, { 28153, 3603 }, 179 { 31755, 3514 }, { 35269, 3429 }, { 38699, 3349 }, { 42047, 3272 }, 180 { 45319, 3198 }, { 48517, 3128 }, { 51645, 3061 }, { 54705, 2996 }, 181 182 /* [n][0] = 10 * log10(2) * log2(1 + n/32) - 10 * log10(2) / 2, */ 183 /* with n = [16..31] */ 184 /* [n][1] = [n+1][0] - [n][0] (while defining [32][0]) */ 185 186 { 8381, 2934 }, { 11315, 2875 }, { 14190, 2818 }, { 17008, 2763 }, 187 { 19772, 2711 }, { 22482, 2660 }, { 25142, 2611 }, { 27754, 2564 }, 188 { 30318, 2519 }, { 32837, 2475 }, { 35312, 2433 }, { 37744, 2392 }, 189 { 40136, 2352 }, { 42489, 2314 }, { 44803, 2277 }, { 47080, 2241 }, 190 191 }; 192 193 /* --- Approximation --- 194 * 195 * 10 * log10(x^2) = 10 * log10(2) * log2(x^2) 196 * 197 * And log2(x^2) = 2 * log2( (1 + m) * 2^e ) 198 * = 2 * (e + log2(1 + m)) , with m in range [0..1] 199 * 200 * Split the float values in : 201 * e2 Double value of the exponent (2 * e + k) 202 * hi High 5 bits of mantissa, for precalculated result `t[hi][0]` 203 * lo Low 16 bits of mantissa, for linear interpolation `t[hi][1]` 204 * 205 * Two cases, from the range of the mantissa : 206 * 0 to 0.5 `k = 0`, use 1st part of the table 207 * 0.5 to 1 `k = 1`, use 2nd part of the table */ 208 209 union { float f; uint32_t u; } x2 = { .f = x*x }; 210 211 int e2 = (int)(x2.u >> 22) - 2*127; 212 int hi = (x2.u >> 18) & 0x1f; 213 int lo = (x2.u >> 2) & 0xffff; 214 215 return e2 * 49321 + t[hi][0] + ((t[hi][1] * lo) >> 16); 216 } 217 218 219 #endif /* __LC3_FASTMATH_H */ 220