//===-- Single-precision general exp/log functions ------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef LLVM_LIBC_SRC_MATH_GENERIC_EXPLOGXF_H #define LLVM_LIBC_SRC_MATH_GENERIC_EXPLOGXF_H #include "common_constants.h" #include "src/__support/CPP/bit.h" #include "src/__support/CPP/optional.h" #include "src/__support/FPUtil/FEnvImpl.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/PolyEval.h" #include "src/__support/FPUtil/nearest_integer.h" #include "src/__support/common.h" #include "src/__support/macros/config.h" #include "src/__support/macros/properties/cpu_features.h" namespace LIBC_NAMESPACE_DECL { struct ExpBase { // Base = e static constexpr int MID_BITS = 5; static constexpr int MID_MASK = (1 << MID_BITS) - 1; // log2(e) * 2^5 static constexpr double LOG2_B = 0x1.71547652b82fep+0 * (1 << MID_BITS); // High and low parts of -log(2) * 2^(-5) static constexpr double M_LOGB_2_HI = -0x1.62e42fefa0000p-1 / (1 << MID_BITS); static constexpr double M_LOGB_2_LO = -0x1.cf79abc9e3b3ap-40 / (1 << MID_BITS); // Look up table for bit fields of 2^(i/32) for i = 0..31, generated by Sollya // with: // > for i from 0 to 31 do printdouble(round(2^(i/32), D, RN)); static constexpr int64_t EXP_2_MID[1 << MID_BITS] = { 0x3ff0000000000000, 0x3ff059b0d3158574, 0x3ff0b5586cf9890f, 0x3ff11301d0125b51, 0x3ff172b83c7d517b, 0x3ff1d4873168b9aa, 0x3ff2387a6e756238, 0x3ff29e9df51fdee1, 0x3ff306fe0a31b715, 0x3ff371a7373aa9cb, 0x3ff3dea64c123422, 0x3ff44e086061892d, 0x3ff4bfdad5362a27, 0x3ff5342b569d4f82, 0x3ff5ab07dd485429, 0x3ff6247eb03a5585, 0x3ff6a09e667f3bcd, 0x3ff71f75e8ec5f74, 0x3ff7a11473eb0187, 0x3ff82589994cce13, 0x3ff8ace5422aa0db, 0x3ff93737b0cdc5e5, 0x3ff9c49182a3f090, 0x3ffa5503b23e255d, 0x3ffae89f995ad3ad, 0x3ffb7f76f2fb5e47, 0x3ffc199bdd85529c, 0x3ffcb720dcef9069, 0x3ffd5818dcfba487, 0x3ffdfc97337b9b5f, 0x3ffea4afa2a490da, 0x3fff50765b6e4540, }; // Approximating e^dx with degree-5 minimax polynomial generated by Sollya: // > Q = fpminimax(expm1(x)/x, 4, [|1, D...|], [-log(2)/64, log(2)/64]); // Then: // e^dx ~ P(dx) = 1 + dx + COEFFS[0] * dx^2 + ... + COEFFS[3] * dx^5. static constexpr double COEFFS[4] = { 0x1.ffffffffe5bc8p-2, 0x1.555555555cd67p-3, 0x1.5555c2a9b48b4p-5, 0x1.11112a0e34bdbp-7}; LIBC_INLINE static double powb_lo(double dx) { using fputil::multiply_add; double dx2 = dx * dx; double c0 = 1.0 + dx; // c1 = COEFFS[0] + COEFFS[1] * dx double c1 = multiply_add(dx, ExpBase::COEFFS[1], ExpBase::COEFFS[0]); // c2 = COEFFS[2] + COEFFS[3] * dx double c2 = multiply_add(dx, ExpBase::COEFFS[3], ExpBase::COEFFS[2]); // r = c4 + c5 * dx^4 // = 1 + dx + COEFFS[0] * dx^2 + ... + COEFFS[5] * dx^7 return fputil::polyeval(dx2, c0, c1, c2); } }; struct Exp10Base : public ExpBase { // log2(10) * 2^5 static constexpr double LOG2_B = 0x1.a934f0979a371p1 * (1 << MID_BITS); // High and low parts of -log10(2) * 2^(-5). // Notice that since |x * log2(10)| < 150: // |k| = |round(x * log2(10) * 2^5)| < 2^8 * 2^5 = 2^13 // So when the FMA instructions are not available, in order for the product // k * M_LOGB_2_HI // to be exact, we only store the high part of log10(2) up to 38 bits // (= 53 - 15) of precision. // It is generated by Sollya with: // > round(log10(2), 44, RN); static constexpr double M_LOGB_2_HI = -0x1.34413509f8p-2 / (1 << MID_BITS); // > round(log10(2) - 0x1.34413509f8p-2, D, RN); static constexpr double M_LOGB_2_LO = 0x1.80433b83b532ap-44 / (1 << MID_BITS); // Approximating 10^dx with degree-5 minimax polynomial generated by Sollya: // > Q = fpminimax((10^x - 1)/x, 4, [|D...|], [-log10(2)/2^6, log10(2)/2^6]); // Then: // 10^dx ~ P(dx) = 1 + COEFFS[0] * dx + ... + COEFFS[4] * dx^5. static constexpr double COEFFS[5] = {0x1.26bb1bbb55515p1, 0x1.53524c73bd3eap1, 0x1.0470591dff149p1, 0x1.2bd7c0a9fbc4dp0, 0x1.1429e74a98f43p-1}; static double powb_lo(double dx) { using fputil::multiply_add; double dx2 = dx * dx; // c0 = 1 + COEFFS[0] * dx double c0 = multiply_add(dx, Exp10Base::COEFFS[0], 1.0); // c1 = COEFFS[1] + COEFFS[2] * dx double c1 = multiply_add(dx, Exp10Base::COEFFS[2], Exp10Base::COEFFS[1]); // c2 = COEFFS[3] + COEFFS[4] * dx double c2 = multiply_add(dx, Exp10Base::COEFFS[4], Exp10Base::COEFFS[3]); // r = c0 + dx^2 * (c1 + c2 * dx^2) // = c0 + c1 * dx^2 + c2 * dx^4 // = 1 + COEFFS[0] * dx + ... + COEFFS[4] * dx^5. return fputil::polyeval(dx2, c0, c1, c2); } }; constexpr int LOG_P1_BITS = 6; constexpr int LOG_P1_SIZE = 1 << LOG_P1_BITS; // N[Table[Log[2, 1 + x], {x, 0/64, 63/64, 1/64}], 40] extern const double LOG_P1_LOG2[LOG_P1_SIZE]; // N[Table[1/(1 + x), {x, 0/64, 63/64, 1/64}], 40] extern const double LOG_P1_1_OVER[LOG_P1_SIZE]; // Taylor series expansion for Log[2, 1 + x] splitted to EVEN AND ODD numbers // K_LOG2_ODD starts from x^3 extern const double K_LOG2_ODD[4]; extern const double K_LOG2_EVEN[4]; // Output of range reduction for exp_b: (2^(mid + hi), lo) // where: // b^x = 2^(mid + hi) * b^lo struct exp_b_reduc_t { double mh; // 2^(mid + hi) double lo; }; // The function correctly calculates b^x value with at least float precision // in a limited range. // Range reduction: // b^x = 2^(hi + mid) * b^lo // where: // x = (hi + mid) * log_b(2) + lo // hi is an integer, // 0 <= mid * 2^MID_BITS < 2^MID_BITS is an integer // -2^(-MID_BITS - 1) <= lo * log2(b) <= 2^(-MID_BITS - 1) // Base class needs to provide the following constants: // - MID_BITS : number of bits after decimal points used for mid // - MID_MASK : 2^MID_BITS - 1, mask to extract mid bits // - LOG2_B : log2(b) * 2^MID_BITS for scaling // - M_LOGB_2_HI : high part of -log_b(2) * 2^(-MID_BITS) // - M_LOGB_2_LO : low part of -log_b(2) * 2^(-MID_BITS) // - EXP_2_MID : look up table for bit fields of 2^mid // Return: // { 2^(hi + mid), lo } template LIBC_INLINE exp_b_reduc_t exp_b_range_reduc(float x) { double xd = static_cast(x); // kd = round((hi + mid) * log2(b) * 2^MID_BITS) double kd = fputil::nearest_integer(Base::LOG2_B * xd); // k = round((hi + mid) * log2(b) * 2^MID_BITS) int k = static_cast(kd); // hi = floor(kd * 2^(-MID_BITS)) // exp_hi = shift hi to the exponent field of double precision. uint64_t exp_hi = static_cast(k >> Base::MID_BITS) << fputil::FPBits::FRACTION_LEN; // mh = 2^hi * 2^mid // mh_bits = bit field of mh uint64_t mh_bits = Base::EXP_2_MID[k & Base::MID_MASK] + exp_hi; double mh = fputil::FPBits(mh_bits).get_val(); // dx = lo = x - (hi + mid) * log(2) double dx = fputil::multiply_add( kd, Base::M_LOGB_2_LO, fputil::multiply_add(kd, Base::M_LOGB_2_HI, xd)); return {mh, dx}; } // The function correctly calculates sinh(x) and cosh(x) by calculating exp(x) // and exp(-x) simultaneously. // To compute e^x, we perform the following range // reduction: find hi, mid, lo such that: // x = (hi + mid) * log(2) + lo, in which // hi is an integer, // 0 <= mid * 2^5 < 32 is an integer // -2^(-6) <= lo * log2(e) <= 2^-6. // In particular, // hi + mid = round(x * log2(e) * 2^5) * 2^(-5). // Then, // e^x = 2^(hi + mid) * e^lo = 2^hi * 2^mid * e^lo. // 2^mid is stored in the lookup table of 32 elements. // e^lo is computed using a degree-5 minimax polynomial // generated by Sollya: // e^lo ~ P(lo) = 1 + lo + c2 * lo^2 + ... + c5 * lo^5 // = (1 + c2*lo^2 + c4*lo^4) + lo * (1 + c3*lo^2 + c5*lo^4) // = P_even + lo * P_odd // We perform 2^hi * 2^mid by simply add hi to the exponent field // of 2^mid. // To compute e^(-x), notice that: // e^(-x) = 2^(-(hi + mid)) * e^(-lo) // ~ 2^(-(hi + mid)) * P(-lo) // = 2^(-(hi + mid)) * (P_even - lo * P_odd) // So: // sinh(x) = (e^x - e^(-x)) / 2 // ~ 0.5 * (2^(hi + mid) * (P_even + lo * P_odd) - // 2^(-(hi + mid)) * (P_even - lo * P_odd)) // = 0.5 * (P_even * (2^(hi + mid) - 2^(-(hi + mid))) + // lo * P_odd * (2^(hi + mid) + 2^(-(hi + mid)))) // And similarly: // cosh(x) = (e^x + e^(-x)) / 2 // ~ 0.5 * (P_even * (2^(hi + mid) + 2^(-(hi + mid))) + // lo * P_odd * (2^(hi + mid) - 2^(-(hi + mid)))) // The main point of these formulas is that the expensive part of calculating // the polynomials approximating lower parts of e^(x) and e^(-x) are shared // and only done once. template LIBC_INLINE double exp_pm_eval(float x) { double xd = static_cast(x); // kd = round(x * log2(e) * 2^5) // k_p = round(x * log2(e) * 2^5) // k_m = round(-x * log2(e) * 2^5) double kd; int k_p, k_m; #ifdef LIBC_TARGET_CPU_HAS_NEAREST_INT kd = fputil::nearest_integer(ExpBase::LOG2_B * xd); k_p = static_cast(kd); k_m = -k_p; #else constexpr double HALF_WAY[2] = {0.5, -0.5}; k_p = static_cast( fputil::multiply_add(xd, ExpBase::LOG2_B, HALF_WAY[x < 0.0f])); k_m = -k_p; kd = static_cast(k_p); #endif // LIBC_TARGET_CPU_HAS_NEAREST_INT // hi = floor(kf * 2^(-5)) // exp_hi = shift hi to the exponent field of double precision. int64_t exp_hi_p = static_cast((k_p >> ExpBase::MID_BITS)) << fputil::FPBits::FRACTION_LEN; int64_t exp_hi_m = static_cast((k_m >> ExpBase::MID_BITS)) << fputil::FPBits::FRACTION_LEN; // mh_p = 2^(hi + mid) // mh_m = 2^(-(hi + mid)) // mh_bits_* = bit field of mh_* int64_t mh_bits_p = ExpBase::EXP_2_MID[k_p & ExpBase::MID_MASK] + exp_hi_p; int64_t mh_bits_m = ExpBase::EXP_2_MID[k_m & ExpBase::MID_MASK] + exp_hi_m; double mh_p = fputil::FPBits(uint64_t(mh_bits_p)).get_val(); double mh_m = fputil::FPBits(uint64_t(mh_bits_m)).get_val(); // mh_sum = 2^(hi + mid) + 2^(-(hi + mid)) double mh_sum = mh_p + mh_m; // mh_diff = 2^(hi + mid) - 2^(-(hi + mid)) double mh_diff = mh_p - mh_m; // dx = lo = x - (hi + mid) * log(2) double dx = fputil::multiply_add(kd, ExpBase::M_LOGB_2_LO, fputil::multiply_add(kd, ExpBase::M_LOGB_2_HI, xd)); double dx2 = dx * dx; // c0 = 1 + COEFFS[0] * lo^2 // P_even = (1 + COEFFS[0] * lo^2 + COEFFS[2] * lo^4) / 2 double p_even = fputil::polyeval(dx2, 0.5, ExpBase::COEFFS[0] * 0.5, ExpBase::COEFFS[2] * 0.5); // P_odd = (1 + COEFFS[1] * lo^2 + COEFFS[3] * lo^4) / 2 double p_odd = fputil::polyeval(dx2, 0.5, ExpBase::COEFFS[1] * 0.5, ExpBase::COEFFS[3] * 0.5); double r; if constexpr (is_sinh) r = fputil::multiply_add(dx * mh_sum, p_odd, p_even * mh_diff); else r = fputil::multiply_add(dx * mh_diff, p_odd, p_even * mh_sum); return r; } // x should be positive, normal finite value LIBC_INLINE static double log2_eval(double x) { using FPB = fputil::FPBits; FPB bs(x); double result = 0; result += bs.get_exponent(); int p1 = (bs.get_mantissa() >> (FPB::FRACTION_LEN - LOG_P1_BITS)) & (LOG_P1_SIZE - 1); bs.set_uintval(bs.uintval() & (FPB::FRACTION_MASK >> LOG_P1_BITS)); bs.set_biased_exponent(FPB::EXP_BIAS); double dx = (bs.get_val() - 1.0) * LOG_P1_1_OVER[p1]; // Taylor series for log(2,1+x) double c1 = fputil::multiply_add(dx, K_LOG2_ODD[0], K_LOG2_EVEN[0]); double c2 = fputil::multiply_add(dx, K_LOG2_ODD[1], K_LOG2_EVEN[1]); double c3 = fputil::multiply_add(dx, K_LOG2_ODD[2], K_LOG2_EVEN[2]); double c4 = fputil::multiply_add(dx, K_LOG2_ODD[3], K_LOG2_EVEN[3]); // c0 = dx * (1.0 / ln(2)) + LOG_P1_LOG2[p1] double c0 = fputil::multiply_add(dx, 0x1.71547652b82fep+0, LOG_P1_LOG2[p1]); result += LIBC_NAMESPACE::fputil::polyeval(dx * dx, c0, c1, c2, c3, c4); return result; } // x should be positive, normal finite value LIBC_INLINE static double log_eval(double x) { // For x = 2^ex * (1 + mx) // log(x) = ex * log(2) + log(1 + mx) using FPB = fputil::FPBits; FPB bs(x); double ex = static_cast(bs.get_exponent()); // p1 is the leading 7 bits of mx, i.e. // p1 * 2^(-7) <= m_x < (p1 + 1) * 2^(-7). int p1 = static_cast(bs.get_mantissa() >> (FPB::FRACTION_LEN - 7)); // Set bs to (1 + (mx - p1*2^(-7)) bs.set_uintval(bs.uintval() & (FPB::FRACTION_MASK >> 7)); bs.set_biased_exponent(FPB::EXP_BIAS); // dx = (mx - p1*2^(-7)) / (1 + p1*2^(-7)). double dx = (bs.get_val() - 1.0) * ONE_OVER_F[p1]; // Minimax polynomial of log(1 + dx) generated by Sollya with: // > P = fpminimax(log(1 + x)/x, 6, [|D...|], [0, 2^-7]); const double COEFFS[6] = {-0x1.ffffffffffffcp-2, 0x1.5555555552ddep-2, -0x1.ffffffefe562dp-3, 0x1.9999817d3a50fp-3, -0x1.554317b3f67a5p-3, 0x1.1dc5c45e09c18p-3}; double dx2 = dx * dx; double c1 = fputil::multiply_add(dx, COEFFS[1], COEFFS[0]); double c2 = fputil::multiply_add(dx, COEFFS[3], COEFFS[2]); double c3 = fputil::multiply_add(dx, COEFFS[5], COEFFS[4]); double p = fputil::polyeval(dx2, dx, c1, c2, c3); double result = fputil::multiply_add(ex, /*log(2)*/ 0x1.62e42fefa39efp-1, LOG_F[p1] + p); return result; } // Rounding tests for 2^hi * (mid + lo) when the output might be denormal. We // assume further that 1 <= mid < 2, mid + lo < 2, and |lo| << mid. // Notice that, if 0 < x < 2^-1022, // double(2^-1022 + x) - 2^-1022 = double(x). // So if we scale x up by 2^1022, we can use // double(1.0 + 2^1022 * x) - 1.0 to test how x is rounded in denormal range. LIBC_INLINE cpp::optional ziv_test_denorm(int hi, double mid, double lo, double err) { using FPBits = typename fputil::FPBits; // Scaling factor = 1/(min normal number) = 2^1022 int64_t exp_hi = static_cast(hi + 1022) << FPBits::FRACTION_LEN; double mid_hi = cpp::bit_cast(exp_hi + cpp::bit_cast(mid)); double lo_scaled = (lo != 0.0) ? cpp::bit_cast(exp_hi + cpp::bit_cast(lo)) : 0.0; double extra_factor = 0.0; uint64_t scale_down = 0x3FE0'0000'0000'0000; // 1022 in the exponent field. // Result is denormal if (mid_hi + lo_scale < 1.0). if ((1.0 - mid_hi) > lo_scaled) { // Extra rounding step is needed, which adds more rounding errors. err += 0x1.0p-52; extra_factor = 1.0; scale_down = 0x3FF0'0000'0000'0000; // 1023 in the exponent field. } double err_scaled = cpp::bit_cast(exp_hi + cpp::bit_cast(err)); double lo_u = lo_scaled + err_scaled; double lo_l = lo_scaled - err_scaled; // By adding 1.0, the results will have similar rounding points as denormal // outputs. double upper = extra_factor + (mid_hi + lo_u); double lower = extra_factor + (mid_hi + lo_l); if (LIBC_LIKELY(upper == lower)) { return cpp::bit_cast(cpp::bit_cast(upper) - scale_down); } return cpp::nullopt; } } // namespace LIBC_NAMESPACE_DECL #endif // LLVM_LIBC_SRC_MATH_GENERIC_EXPLOGXF_H