14930cef6SMatthias Ringwald /******************************************************************************
24930cef6SMatthias Ringwald *
34930cef6SMatthias Ringwald * Copyright 2022 Google LLC
44930cef6SMatthias Ringwald *
54930cef6SMatthias Ringwald * Licensed under the Apache License, Version 2.0 (the "License");
64930cef6SMatthias Ringwald * you may not use this file except in compliance with the License.
74930cef6SMatthias Ringwald * You may obtain a copy of the License at:
84930cef6SMatthias Ringwald *
94930cef6SMatthias Ringwald * http://www.apache.org/licenses/LICENSE-2.0
104930cef6SMatthias Ringwald *
114930cef6SMatthias Ringwald * Unless required by applicable law or agreed to in writing, software
124930cef6SMatthias Ringwald * distributed under the License is distributed on an "AS IS" BASIS,
134930cef6SMatthias Ringwald * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
144930cef6SMatthias Ringwald * See the License for the specific language governing permissions and
154930cef6SMatthias Ringwald * limitations under the License.
164930cef6SMatthias Ringwald *
174930cef6SMatthias Ringwald ******************************************************************************/
184930cef6SMatthias Ringwald
194930cef6SMatthias Ringwald #ifndef __LC3_FASTMATH_H
204930cef6SMatthias Ringwald #define __LC3_FASTMATH_H
214930cef6SMatthias Ringwald
224930cef6SMatthias Ringwald #include <stdint.h>
23*6897da5cSDirk Helbig #include <float.h>
244930cef6SMatthias Ringwald #include <math.h>
254930cef6SMatthias Ringwald
264930cef6SMatthias Ringwald
274930cef6SMatthias Ringwald /**
28*6897da5cSDirk Helbig * IEEE 754 Floating point representation
294930cef6SMatthias Ringwald */
30*6897da5cSDirk Helbig
31*6897da5cSDirk Helbig #define LC3_IEEE754_SIGN_SHL (31)
32*6897da5cSDirk Helbig #define LC3_IEEE754_SIGN_MASK (1 << LC3_IEEE754_SIGN_SHL)
33*6897da5cSDirk Helbig
34*6897da5cSDirk Helbig #define LC3_IEEE754_EXP_SHL (23)
35*6897da5cSDirk Helbig #define LC3_IEEE754_EXP_MASK (0xff << LC3_IEEE754_EXP_SHL)
36*6897da5cSDirk Helbig #define LC3_IEEE754_EXP_BIAS (127)
37*6897da5cSDirk Helbig
38*6897da5cSDirk Helbig
39*6897da5cSDirk Helbig /**
40*6897da5cSDirk Helbig * Fast multiply floating-point number by integral power of 2
41*6897da5cSDirk Helbig * x Operand, finite number
42*6897da5cSDirk Helbig * exp Exponent such that 2^x is finite
43*6897da5cSDirk Helbig * return 2^exp
44*6897da5cSDirk Helbig */
lc3_ldexpf(float _x,int exp)45*6897da5cSDirk Helbig static inline float lc3_ldexpf(float _x, int exp) {
46*6897da5cSDirk Helbig union { float f; int32_t s; } x = { .f = _x };
47*6897da5cSDirk Helbig
48*6897da5cSDirk Helbig if (x.s & LC3_IEEE754_EXP_MASK)
49*6897da5cSDirk Helbig x.s += exp << LC3_IEEE754_EXP_SHL;
50*6897da5cSDirk Helbig
51*6897da5cSDirk Helbig return x.f;
52*6897da5cSDirk Helbig }
53*6897da5cSDirk Helbig
54*6897da5cSDirk Helbig /**
55*6897da5cSDirk Helbig * Fast convert floating-point number to fractional and integral components
56*6897da5cSDirk Helbig * x Operand, finite number
57*6897da5cSDirk Helbig * exp Return the exponent part
58*6897da5cSDirk Helbig * return The normalized fraction in [0.5:1[
59*6897da5cSDirk Helbig */
lc3_frexpf(float _x,int * exp)60*6897da5cSDirk Helbig static inline float lc3_frexpf(float _x, int *exp) {
61*6897da5cSDirk Helbig union { float f; uint32_t u; } x = { .f = _x };
62*6897da5cSDirk Helbig
63*6897da5cSDirk Helbig int e = (x.u & LC3_IEEE754_EXP_MASK) >> LC3_IEEE754_EXP_SHL;
64*6897da5cSDirk Helbig *exp = e - (LC3_IEEE754_EXP_BIAS - 1);
65*6897da5cSDirk Helbig
66*6897da5cSDirk Helbig x.u = (x.u & ~LC3_IEEE754_EXP_MASK) |
67*6897da5cSDirk Helbig ((LC3_IEEE754_EXP_BIAS - 1) << LC3_IEEE754_EXP_SHL);
68*6897da5cSDirk Helbig
69*6897da5cSDirk Helbig return x.f;
70*6897da5cSDirk Helbig }
71*6897da5cSDirk Helbig
72*6897da5cSDirk Helbig /**
73*6897da5cSDirk Helbig * Fast 2^n approximation
74*6897da5cSDirk Helbig * x Operand, range -100 to 100
75*6897da5cSDirk Helbig * return 2^x approximation (max relative error ~ 4e-7)
76*6897da5cSDirk Helbig */
lc3_exp2f(float x)77*6897da5cSDirk Helbig static inline float lc3_exp2f(float x)
784930cef6SMatthias Ringwald {
79*6897da5cSDirk Helbig /* --- 2^(i/8) for i from 0 to 7 --- */
804930cef6SMatthias Ringwald
81*6897da5cSDirk Helbig static const float e[] = {
82*6897da5cSDirk Helbig 1.00000000e+00, 1.09050773e+00, 1.18920712e+00, 1.29683955e+00,
83*6897da5cSDirk Helbig 1.41421356e+00, 1.54221083e+00, 1.68179283e+00, 1.83400809e+00 };
844930cef6SMatthias Ringwald
85*6897da5cSDirk Helbig /* --- Polynomial approx in range 0 to 1/8 --- */
864930cef6SMatthias Ringwald
87*6897da5cSDirk Helbig static const float p[] = {
88*6897da5cSDirk Helbig 1.00448128e-02, 5.54563260e-02, 2.40228756e-01, 6.93147140e-01 };
894930cef6SMatthias Ringwald
90*6897da5cSDirk Helbig /* --- Split the operand ---
91*6897da5cSDirk Helbig *
92*6897da5cSDirk Helbig * Such as x = k/8 + y, with k an integer, and |y| < 0.5/8
93*6897da5cSDirk Helbig *
94*6897da5cSDirk Helbig * Note that `fast-math` compiler option leads to rounding error,
95*6897da5cSDirk Helbig * disable optimisation with `volatile`. */
964930cef6SMatthias Ringwald
97*6897da5cSDirk Helbig volatile union { float f; int32_t s; } v;
984930cef6SMatthias Ringwald
99*6897da5cSDirk Helbig v.f = x + 0x1.8p20f;
100*6897da5cSDirk Helbig int k = v.s;
101*6897da5cSDirk Helbig x -= v.f - 0x1.8p20f;
102*6897da5cSDirk Helbig
103*6897da5cSDirk Helbig /* --- Compute 2^x, with |x| < 1 ---
104*6897da5cSDirk Helbig * Perform polynomial approximation in range -0.5/8 to 0.5/8,
105*6897da5cSDirk Helbig * and muplity by precomputed value of 2^(i/8), i in [0:7] */
106*6897da5cSDirk Helbig
107*6897da5cSDirk Helbig union { float f; int32_t s; } y;
108*6897da5cSDirk Helbig
109*6897da5cSDirk Helbig y.f = ( p[0]) * x;
110*6897da5cSDirk Helbig y.f = (y.f + p[1]) * x;
111*6897da5cSDirk Helbig y.f = (y.f + p[2]) * x;
112*6897da5cSDirk Helbig y.f = (y.f + p[3]) * x;
113*6897da5cSDirk Helbig y.f = (y.f + 1.f) * e[k & 7];
114*6897da5cSDirk Helbig
115*6897da5cSDirk Helbig /* --- Add the exponent --- */
116*6897da5cSDirk Helbig
117*6897da5cSDirk Helbig y.s += (k >> 3) << LC3_IEEE754_EXP_SHL;
118*6897da5cSDirk Helbig
119*6897da5cSDirk Helbig return y.f;
1204930cef6SMatthias Ringwald }
1214930cef6SMatthias Ringwald
1224930cef6SMatthias Ringwald /**
1234930cef6SMatthias Ringwald * Fast log2(x) approximation
1244930cef6SMatthias Ringwald * x Operand, greater than 0
1254930cef6SMatthias Ringwald * return log2(x) approximation (max absolute error ~ 1e-4)
1264930cef6SMatthias Ringwald */
lc3_log2f(float x)127*6897da5cSDirk Helbig static inline float lc3_log2f(float x)
1284930cef6SMatthias Ringwald {
1294930cef6SMatthias Ringwald float y;
1304930cef6SMatthias Ringwald int e;
1314930cef6SMatthias Ringwald
1324930cef6SMatthias Ringwald /* --- Polynomial approx in range 0.5 to 1 --- */
1334930cef6SMatthias Ringwald
1344930cef6SMatthias Ringwald static const float c[] = {
1354930cef6SMatthias Ringwald -1.29479677, 5.11769018, -8.42295281, 8.10557963, -3.50567360 };
1364930cef6SMatthias Ringwald
137*6897da5cSDirk Helbig x = lc3_frexpf(x, &e);
1384930cef6SMatthias Ringwald
1394930cef6SMatthias Ringwald y = ( c[0]) * x;
1404930cef6SMatthias Ringwald y = (y + c[1]) * x;
1414930cef6SMatthias Ringwald y = (y + c[2]) * x;
1424930cef6SMatthias Ringwald y = (y + c[3]) * x;
1434930cef6SMatthias Ringwald y = (y + c[4]);
1444930cef6SMatthias Ringwald
1454930cef6SMatthias Ringwald /* --- Add log2f(2^e) and return --- */
1464930cef6SMatthias Ringwald
1474930cef6SMatthias Ringwald return e + y;
1484930cef6SMatthias Ringwald }
1494930cef6SMatthias Ringwald
1504930cef6SMatthias Ringwald /**
1514930cef6SMatthias Ringwald * Fast log10(x) approximation
1524930cef6SMatthias Ringwald * x Operand, greater than 0
1534930cef6SMatthias Ringwald * return log10(x) approximation (max absolute error ~ 1e-4)
1544930cef6SMatthias Ringwald */
lc3_log10f(float x)155*6897da5cSDirk Helbig static inline float lc3_log10f(float x)
1564930cef6SMatthias Ringwald {
157*6897da5cSDirk Helbig return log10f(2) * lc3_log2f(x);
1584930cef6SMatthias Ringwald }
1594930cef6SMatthias Ringwald
1604930cef6SMatthias Ringwald /**
1614930cef6SMatthias Ringwald * Fast `10 * log10(x)` (or dB) approximation in fixed Q16
1624930cef6SMatthias Ringwald * x Operand, in range 2^-63 to 2^63 (1e-19 to 1e19)
1634930cef6SMatthias Ringwald * return 10 * log10(x) in fixed Q16 (-190 to 192 dB)
1644930cef6SMatthias Ringwald *
1654930cef6SMatthias Ringwald * - The 0 value is accepted and return the minimum value ~ -191dB
1664930cef6SMatthias Ringwald * - This function assumed that float 32 bits is coded IEEE 754
1674930cef6SMatthias Ringwald */
lc3_db_q16(float x)168*6897da5cSDirk Helbig static inline int32_t lc3_db_q16(float x)
1694930cef6SMatthias Ringwald {
1704930cef6SMatthias Ringwald /* --- Table in Q15 --- */
1714930cef6SMatthias Ringwald
1724930cef6SMatthias Ringwald static const uint16_t t[][2] = {
1734930cef6SMatthias Ringwald
1744930cef6SMatthias Ringwald /* [n][0] = 10 * log10(2) * log2(1 + n/32), with n = [0..15] */
1754930cef6SMatthias Ringwald /* [n][1] = [n+1][0] - [n][0] (while defining [16][0]) */
1764930cef6SMatthias Ringwald
1774930cef6SMatthias Ringwald { 0, 4379 }, { 4379, 4248 }, { 8627, 4125 }, { 12753, 4009 },
1784930cef6SMatthias Ringwald { 16762, 3899 }, { 20661, 3795 }, { 24456, 3697 }, { 28153, 3603 },
1794930cef6SMatthias Ringwald { 31755, 3514 }, { 35269, 3429 }, { 38699, 3349 }, { 42047, 3272 },
1804930cef6SMatthias Ringwald { 45319, 3198 }, { 48517, 3128 }, { 51645, 3061 }, { 54705, 2996 },
1814930cef6SMatthias Ringwald
1824930cef6SMatthias Ringwald /* [n][0] = 10 * log10(2) * log2(1 + n/32) - 10 * log10(2) / 2, */
1834930cef6SMatthias Ringwald /* with n = [16..31] */
1844930cef6SMatthias Ringwald /* [n][1] = [n+1][0] - [n][0] (while defining [32][0]) */
1854930cef6SMatthias Ringwald
1864930cef6SMatthias Ringwald { 8381, 2934 }, { 11315, 2875 }, { 14190, 2818 }, { 17008, 2763 },
1874930cef6SMatthias Ringwald { 19772, 2711 }, { 22482, 2660 }, { 25142, 2611 }, { 27754, 2564 },
1884930cef6SMatthias Ringwald { 30318, 2519 }, { 32837, 2475 }, { 35312, 2433 }, { 37744, 2392 },
1894930cef6SMatthias Ringwald { 40136, 2352 }, { 42489, 2314 }, { 44803, 2277 }, { 47080, 2241 },
1904930cef6SMatthias Ringwald
1914930cef6SMatthias Ringwald };
1924930cef6SMatthias Ringwald
1934930cef6SMatthias Ringwald /* --- Approximation ---
1944930cef6SMatthias Ringwald *
1954930cef6SMatthias Ringwald * 10 * log10(x^2) = 10 * log10(2) * log2(x^2)
1964930cef6SMatthias Ringwald *
1974930cef6SMatthias Ringwald * And log2(x^2) = 2 * log2( (1 + m) * 2^e )
1984930cef6SMatthias Ringwald * = 2 * (e + log2(1 + m)) , with m in range [0..1]
1994930cef6SMatthias Ringwald *
2004930cef6SMatthias Ringwald * Split the float values in :
2014930cef6SMatthias Ringwald * e2 Double value of the exponent (2 * e + k)
2024930cef6SMatthias Ringwald * hi High 5 bits of mantissa, for precalculated result `t[hi][0]`
2034930cef6SMatthias Ringwald * lo Low 16 bits of mantissa, for linear interpolation `t[hi][1]`
2044930cef6SMatthias Ringwald *
2054930cef6SMatthias Ringwald * Two cases, from the range of the mantissa :
2064930cef6SMatthias Ringwald * 0 to 0.5 `k = 0`, use 1st part of the table
2074930cef6SMatthias Ringwald * 0.5 to 1 `k = 1`, use 2nd part of the table */
2084930cef6SMatthias Ringwald
2094930cef6SMatthias Ringwald union { float f; uint32_t u; } x2 = { .f = x*x };
2104930cef6SMatthias Ringwald
2114930cef6SMatthias Ringwald int e2 = (int)(x2.u >> 22) - 2*127;
2124930cef6SMatthias Ringwald int hi = (x2.u >> 18) & 0x1f;
2134930cef6SMatthias Ringwald int lo = (x2.u >> 2) & 0xffff;
2144930cef6SMatthias Ringwald
2154930cef6SMatthias Ringwald return e2 * 49321 + t[hi][0] + ((t[hi][1] * lo) >> 16);
2164930cef6SMatthias Ringwald }
2174930cef6SMatthias Ringwald
2184930cef6SMatthias Ringwald
2194930cef6SMatthias Ringwald #endif /* __LC3_FASTMATH_H */
220