1*71db0c75SAndroid Build Coastguard Worker //===-- Implementation of hypotf function ---------------------------------===// 2*71db0c75SAndroid Build Coastguard Worker // 3*71db0c75SAndroid Build Coastguard Worker // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*71db0c75SAndroid Build Coastguard Worker // See https://llvm.org/LICENSE.txt for license information. 5*71db0c75SAndroid Build Coastguard Worker // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*71db0c75SAndroid Build Coastguard Worker // 7*71db0c75SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===// 8*71db0c75SAndroid Build Coastguard Worker #include "src/math/hypotf.h" 9*71db0c75SAndroid Build Coastguard Worker #include "src/__support/FPUtil/FEnvImpl.h" 10*71db0c75SAndroid Build Coastguard Worker #include "src/__support/FPUtil/FPBits.h" 11*71db0c75SAndroid Build Coastguard Worker #include "src/__support/FPUtil/double_double.h" 12*71db0c75SAndroid Build Coastguard Worker #include "src/__support/FPUtil/multiply_add.h" 13*71db0c75SAndroid Build Coastguard Worker #include "src/__support/FPUtil/sqrt.h" 14*71db0c75SAndroid Build Coastguard Worker #include "src/__support/common.h" 15*71db0c75SAndroid Build Coastguard Worker #include "src/__support/macros/config.h" 16*71db0c75SAndroid Build Coastguard Worker #include "src/__support/macros/optimization.h" 17*71db0c75SAndroid Build Coastguard Worker 18*71db0c75SAndroid Build Coastguard Worker namespace LIBC_NAMESPACE_DECL { 19*71db0c75SAndroid Build Coastguard Worker 20*71db0c75SAndroid Build Coastguard Worker LLVM_LIBC_FUNCTION(float, hypotf, (float x, float y)) { 21*71db0c75SAndroid Build Coastguard Worker using DoubleBits = fputil::FPBits<double>; 22*71db0c75SAndroid Build Coastguard Worker using FPBits = fputil::FPBits<float>; 23*71db0c75SAndroid Build Coastguard Worker 24*71db0c75SAndroid Build Coastguard Worker FPBits x_abs = FPBits(x).abs(); 25*71db0c75SAndroid Build Coastguard Worker FPBits y_abs = FPBits(y).abs(); 26*71db0c75SAndroid Build Coastguard Worker 27*71db0c75SAndroid Build Coastguard Worker bool x_abs_larger = x_abs.uintval() >= y_abs.uintval(); 28*71db0c75SAndroid Build Coastguard Worker 29*71db0c75SAndroid Build Coastguard Worker FPBits a_bits = x_abs_larger ? x_abs : y_abs; 30*71db0c75SAndroid Build Coastguard Worker FPBits b_bits = x_abs_larger ? y_abs : x_abs; 31*71db0c75SAndroid Build Coastguard Worker 32*71db0c75SAndroid Build Coastguard Worker uint32_t a_u = a_bits.uintval(); 33*71db0c75SAndroid Build Coastguard Worker uint32_t b_u = b_bits.uintval(); 34*71db0c75SAndroid Build Coastguard Worker 35*71db0c75SAndroid Build Coastguard Worker // Note: replacing `a_u >= FPBits::EXP_MASK` with `a_bits.is_inf_or_nan()` 36*71db0c75SAndroid Build Coastguard Worker // generates extra exponent bit masking instructions on x86-64. 37*71db0c75SAndroid Build Coastguard Worker if (LIBC_UNLIKELY(a_u >= FPBits::EXP_MASK)) { 38*71db0c75SAndroid Build Coastguard Worker // x or y is inf or nan 39*71db0c75SAndroid Build Coastguard Worker if (a_bits.is_signaling_nan() || b_bits.is_signaling_nan()) { 40*71db0c75SAndroid Build Coastguard Worker fputil::raise_except_if_required(FE_INVALID); 41*71db0c75SAndroid Build Coastguard Worker return FPBits::quiet_nan().get_val(); 42*71db0c75SAndroid Build Coastguard Worker } 43*71db0c75SAndroid Build Coastguard Worker if (a_bits.is_inf() || b_bits.is_inf()) 44*71db0c75SAndroid Build Coastguard Worker return FPBits::inf().get_val(); 45*71db0c75SAndroid Build Coastguard Worker return a_bits.get_val(); 46*71db0c75SAndroid Build Coastguard Worker } 47*71db0c75SAndroid Build Coastguard Worker 48*71db0c75SAndroid Build Coastguard Worker if (LIBC_UNLIKELY(a_u - b_u >= 49*71db0c75SAndroid Build Coastguard Worker static_cast<uint32_t>((FPBits::FRACTION_LEN + 2) 50*71db0c75SAndroid Build Coastguard Worker << FPBits::FRACTION_LEN))) 51*71db0c75SAndroid Build Coastguard Worker return x_abs.get_val() + y_abs.get_val(); 52*71db0c75SAndroid Build Coastguard Worker 53*71db0c75SAndroid Build Coastguard Worker double ad = static_cast<double>(a_bits.get_val()); 54*71db0c75SAndroid Build Coastguard Worker double bd = static_cast<double>(b_bits.get_val()); 55*71db0c75SAndroid Build Coastguard Worker 56*71db0c75SAndroid Build Coastguard Worker // These squares are exact. 57*71db0c75SAndroid Build Coastguard Worker double a_sq = ad * ad; 58*71db0c75SAndroid Build Coastguard Worker #ifdef LIBC_TARGET_CPU_HAS_FMA 59*71db0c75SAndroid Build Coastguard Worker double sum_sq = fputil::multiply_add(bd, bd, a_sq); 60*71db0c75SAndroid Build Coastguard Worker #else 61*71db0c75SAndroid Build Coastguard Worker double b_sq = bd * bd; 62*71db0c75SAndroid Build Coastguard Worker double sum_sq = a_sq + b_sq; 63*71db0c75SAndroid Build Coastguard Worker #endif 64*71db0c75SAndroid Build Coastguard Worker 65*71db0c75SAndroid Build Coastguard Worker // Take sqrt in double precision. 66*71db0c75SAndroid Build Coastguard Worker DoubleBits result(fputil::sqrt<double>(sum_sq)); 67*71db0c75SAndroid Build Coastguard Worker uint64_t r_u = result.uintval(); 68*71db0c75SAndroid Build Coastguard Worker 69*71db0c75SAndroid Build Coastguard Worker // If any of the sticky bits of the result are non-zero, except the LSB, then 70*71db0c75SAndroid Build Coastguard Worker // the rounded result is correct. 71*71db0c75SAndroid Build Coastguard Worker if (LIBC_UNLIKELY(((r_u + 1) & 0x0000'0000'0FFF'FFFE) == 0)) { 72*71db0c75SAndroid Build Coastguard Worker double r_d = result.get_val(); 73*71db0c75SAndroid Build Coastguard Worker 74*71db0c75SAndroid Build Coastguard Worker // Perform rounding correction. 75*71db0c75SAndroid Build Coastguard Worker #ifdef LIBC_TARGET_CPU_HAS_FMA 76*71db0c75SAndroid Build Coastguard Worker double sum_sq_lo = fputil::multiply_add(bd, bd, a_sq - sum_sq); 77*71db0c75SAndroid Build Coastguard Worker double err = sum_sq_lo - fputil::multiply_add(r_d, r_d, -sum_sq); 78*71db0c75SAndroid Build Coastguard Worker #else 79*71db0c75SAndroid Build Coastguard Worker fputil::DoubleDouble r_sq = fputil::exact_mult(r_d, r_d); 80*71db0c75SAndroid Build Coastguard Worker double sum_sq_lo = b_sq - (sum_sq - a_sq); 81*71db0c75SAndroid Build Coastguard Worker double err = (sum_sq - r_sq.hi) + (sum_sq_lo - r_sq.lo); 82*71db0c75SAndroid Build Coastguard Worker #endif 83*71db0c75SAndroid Build Coastguard Worker 84*71db0c75SAndroid Build Coastguard Worker if (err > 0) { 85*71db0c75SAndroid Build Coastguard Worker r_u |= 1; 86*71db0c75SAndroid Build Coastguard Worker } else if ((err < 0) && (r_u & 1) == 0) { 87*71db0c75SAndroid Build Coastguard Worker r_u -= 1; 88*71db0c75SAndroid Build Coastguard Worker } else if ((r_u & 0x0000'0000'1FFF'FFFF) == 0) { 89*71db0c75SAndroid Build Coastguard Worker // The rounded result is exact. 90*71db0c75SAndroid Build Coastguard Worker fputil::clear_except_if_required(FE_INEXACT); 91*71db0c75SAndroid Build Coastguard Worker } 92*71db0c75SAndroid Build Coastguard Worker return static_cast<float>(DoubleBits(r_u).get_val()); 93*71db0c75SAndroid Build Coastguard Worker } 94*71db0c75SAndroid Build Coastguard Worker 95*71db0c75SAndroid Build Coastguard Worker return static_cast<float>(result.get_val()); 96*71db0c75SAndroid Build Coastguard Worker } 97*71db0c75SAndroid Build Coastguard Worker 98*71db0c75SAndroid Build Coastguard Worker } // namespace LIBC_NAMESPACE_DECL 99