1 //===-- Implementation of hypotf function ---------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 #include "src/math/hypotf.h" 9 #include "src/__support/FPUtil/FEnvImpl.h" 10 #include "src/__support/FPUtil/FPBits.h" 11 #include "src/__support/FPUtil/double_double.h" 12 #include "src/__support/FPUtil/multiply_add.h" 13 #include "src/__support/FPUtil/sqrt.h" 14 #include "src/__support/common.h" 15 #include "src/__support/macros/config.h" 16 #include "src/__support/macros/optimization.h" 17 18 namespace LIBC_NAMESPACE_DECL { 19 20 LLVM_LIBC_FUNCTION(float, hypotf, (float x, float y)) { 21 using DoubleBits = fputil::FPBits<double>; 22 using FPBits = fputil::FPBits<float>; 23 24 FPBits x_abs = FPBits(x).abs(); 25 FPBits y_abs = FPBits(y).abs(); 26 27 bool x_abs_larger = x_abs.uintval() >= y_abs.uintval(); 28 29 FPBits a_bits = x_abs_larger ? x_abs : y_abs; 30 FPBits b_bits = x_abs_larger ? y_abs : x_abs; 31 32 uint32_t a_u = a_bits.uintval(); 33 uint32_t b_u = b_bits.uintval(); 34 35 // Note: replacing `a_u >= FPBits::EXP_MASK` with `a_bits.is_inf_or_nan()` 36 // generates extra exponent bit masking instructions on x86-64. 37 if (LIBC_UNLIKELY(a_u >= FPBits::EXP_MASK)) { 38 // x or y is inf or nan 39 if (a_bits.is_signaling_nan() || b_bits.is_signaling_nan()) { 40 fputil::raise_except_if_required(FE_INVALID); 41 return FPBits::quiet_nan().get_val(); 42 } 43 if (a_bits.is_inf() || b_bits.is_inf()) 44 return FPBits::inf().get_val(); 45 return a_bits.get_val(); 46 } 47 48 if (LIBC_UNLIKELY(a_u - b_u >= 49 static_cast<uint32_t>((FPBits::FRACTION_LEN + 2) 50 << FPBits::FRACTION_LEN))) 51 return x_abs.get_val() + y_abs.get_val(); 52 53 double ad = static_cast<double>(a_bits.get_val()); 54 double bd = static_cast<double>(b_bits.get_val()); 55 56 // These squares are exact. 57 double a_sq = ad * ad; 58 #ifdef LIBC_TARGET_CPU_HAS_FMA 59 double sum_sq = fputil::multiply_add(bd, bd, a_sq); 60 #else 61 double b_sq = bd * bd; 62 double sum_sq = a_sq + b_sq; 63 #endif 64 65 // Take sqrt in double precision. 66 DoubleBits result(fputil::sqrt<double>(sum_sq)); 67 uint64_t r_u = result.uintval(); 68 69 // If any of the sticky bits of the result are non-zero, except the LSB, then 70 // the rounded result is correct. 71 if (LIBC_UNLIKELY(((r_u + 1) & 0x0000'0000'0FFF'FFFE) == 0)) { 72 double r_d = result.get_val(); 73 74 // Perform rounding correction. 75 #ifdef LIBC_TARGET_CPU_HAS_FMA 76 double sum_sq_lo = fputil::multiply_add(bd, bd, a_sq - sum_sq); 77 double err = sum_sq_lo - fputil::multiply_add(r_d, r_d, -sum_sq); 78 #else 79 fputil::DoubleDouble r_sq = fputil::exact_mult(r_d, r_d); 80 double sum_sq_lo = b_sq - (sum_sq - a_sq); 81 double err = (sum_sq - r_sq.hi) + (sum_sq_lo - r_sq.lo); 82 #endif 83 84 if (err > 0) { 85 r_u |= 1; 86 } else if ((err < 0) && (r_u & 1) == 0) { 87 r_u -= 1; 88 } else if ((r_u & 0x0000'0000'1FFF'FFFF) == 0) { 89 // The rounded result is exact. 90 fputil::clear_except_if_required(FE_INEXACT); 91 } 92 return static_cast<float>(DoubleBits(r_u).get_val()); 93 } 94 95 return static_cast<float>(result.get_val()); 96 } 97 98 } // namespace LIBC_NAMESPACE_DECL 99