xref: /aosp_15_r20/external/llvm-libc/src/math/generic/hypotf.cpp (revision 71db0c75aadcf003ffe3238005f61d7618a3fead)
1*71db0c75SAndroid Build Coastguard Worker //===-- Implementation of hypotf function ---------------------------------===//
2*71db0c75SAndroid Build Coastguard Worker //
3*71db0c75SAndroid Build Coastguard Worker // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*71db0c75SAndroid Build Coastguard Worker // See https://llvm.org/LICENSE.txt for license information.
5*71db0c75SAndroid Build Coastguard Worker // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*71db0c75SAndroid Build Coastguard Worker //
7*71db0c75SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
8*71db0c75SAndroid Build Coastguard Worker #include "src/math/hypotf.h"
9*71db0c75SAndroid Build Coastguard Worker #include "src/__support/FPUtil/FEnvImpl.h"
10*71db0c75SAndroid Build Coastguard Worker #include "src/__support/FPUtil/FPBits.h"
11*71db0c75SAndroid Build Coastguard Worker #include "src/__support/FPUtil/double_double.h"
12*71db0c75SAndroid Build Coastguard Worker #include "src/__support/FPUtil/multiply_add.h"
13*71db0c75SAndroid Build Coastguard Worker #include "src/__support/FPUtil/sqrt.h"
14*71db0c75SAndroid Build Coastguard Worker #include "src/__support/common.h"
15*71db0c75SAndroid Build Coastguard Worker #include "src/__support/macros/config.h"
16*71db0c75SAndroid Build Coastguard Worker #include "src/__support/macros/optimization.h"
17*71db0c75SAndroid Build Coastguard Worker 
18*71db0c75SAndroid Build Coastguard Worker namespace LIBC_NAMESPACE_DECL {
19*71db0c75SAndroid Build Coastguard Worker 
20*71db0c75SAndroid Build Coastguard Worker LLVM_LIBC_FUNCTION(float, hypotf, (float x, float y)) {
21*71db0c75SAndroid Build Coastguard Worker   using DoubleBits = fputil::FPBits<double>;
22*71db0c75SAndroid Build Coastguard Worker   using FPBits = fputil::FPBits<float>;
23*71db0c75SAndroid Build Coastguard Worker 
24*71db0c75SAndroid Build Coastguard Worker   FPBits x_abs = FPBits(x).abs();
25*71db0c75SAndroid Build Coastguard Worker   FPBits y_abs = FPBits(y).abs();
26*71db0c75SAndroid Build Coastguard Worker 
27*71db0c75SAndroid Build Coastguard Worker   bool x_abs_larger = x_abs.uintval() >= y_abs.uintval();
28*71db0c75SAndroid Build Coastguard Worker 
29*71db0c75SAndroid Build Coastguard Worker   FPBits a_bits = x_abs_larger ? x_abs : y_abs;
30*71db0c75SAndroid Build Coastguard Worker   FPBits b_bits = x_abs_larger ? y_abs : x_abs;
31*71db0c75SAndroid Build Coastguard Worker 
32*71db0c75SAndroid Build Coastguard Worker   uint32_t a_u = a_bits.uintval();
33*71db0c75SAndroid Build Coastguard Worker   uint32_t b_u = b_bits.uintval();
34*71db0c75SAndroid Build Coastguard Worker 
35*71db0c75SAndroid Build Coastguard Worker   // Note: replacing `a_u >= FPBits::EXP_MASK` with `a_bits.is_inf_or_nan()`
36*71db0c75SAndroid Build Coastguard Worker   // generates extra exponent bit masking instructions on x86-64.
37*71db0c75SAndroid Build Coastguard Worker   if (LIBC_UNLIKELY(a_u >= FPBits::EXP_MASK)) {
38*71db0c75SAndroid Build Coastguard Worker     // x or y is inf or nan
39*71db0c75SAndroid Build Coastguard Worker     if (a_bits.is_signaling_nan() || b_bits.is_signaling_nan()) {
40*71db0c75SAndroid Build Coastguard Worker       fputil::raise_except_if_required(FE_INVALID);
41*71db0c75SAndroid Build Coastguard Worker       return FPBits::quiet_nan().get_val();
42*71db0c75SAndroid Build Coastguard Worker     }
43*71db0c75SAndroid Build Coastguard Worker     if (a_bits.is_inf() || b_bits.is_inf())
44*71db0c75SAndroid Build Coastguard Worker       return FPBits::inf().get_val();
45*71db0c75SAndroid Build Coastguard Worker     return a_bits.get_val();
46*71db0c75SAndroid Build Coastguard Worker   }
47*71db0c75SAndroid Build Coastguard Worker 
48*71db0c75SAndroid Build Coastguard Worker   if (LIBC_UNLIKELY(a_u - b_u >=
49*71db0c75SAndroid Build Coastguard Worker                     static_cast<uint32_t>((FPBits::FRACTION_LEN + 2)
50*71db0c75SAndroid Build Coastguard Worker                                           << FPBits::FRACTION_LEN)))
51*71db0c75SAndroid Build Coastguard Worker     return x_abs.get_val() + y_abs.get_val();
52*71db0c75SAndroid Build Coastguard Worker 
53*71db0c75SAndroid Build Coastguard Worker   double ad = static_cast<double>(a_bits.get_val());
54*71db0c75SAndroid Build Coastguard Worker   double bd = static_cast<double>(b_bits.get_val());
55*71db0c75SAndroid Build Coastguard Worker 
56*71db0c75SAndroid Build Coastguard Worker   // These squares are exact.
57*71db0c75SAndroid Build Coastguard Worker   double a_sq = ad * ad;
58*71db0c75SAndroid Build Coastguard Worker #ifdef LIBC_TARGET_CPU_HAS_FMA
59*71db0c75SAndroid Build Coastguard Worker   double sum_sq = fputil::multiply_add(bd, bd, a_sq);
60*71db0c75SAndroid Build Coastguard Worker #else
61*71db0c75SAndroid Build Coastguard Worker   double b_sq = bd * bd;
62*71db0c75SAndroid Build Coastguard Worker   double sum_sq = a_sq + b_sq;
63*71db0c75SAndroid Build Coastguard Worker #endif
64*71db0c75SAndroid Build Coastguard Worker 
65*71db0c75SAndroid Build Coastguard Worker   // Take sqrt in double precision.
66*71db0c75SAndroid Build Coastguard Worker   DoubleBits result(fputil::sqrt<double>(sum_sq));
67*71db0c75SAndroid Build Coastguard Worker   uint64_t r_u = result.uintval();
68*71db0c75SAndroid Build Coastguard Worker 
69*71db0c75SAndroid Build Coastguard Worker   // If any of the sticky bits of the result are non-zero, except the LSB, then
70*71db0c75SAndroid Build Coastguard Worker   // the rounded result is correct.
71*71db0c75SAndroid Build Coastguard Worker   if (LIBC_UNLIKELY(((r_u + 1) & 0x0000'0000'0FFF'FFFE) == 0)) {
72*71db0c75SAndroid Build Coastguard Worker     double r_d = result.get_val();
73*71db0c75SAndroid Build Coastguard Worker 
74*71db0c75SAndroid Build Coastguard Worker     // Perform rounding correction.
75*71db0c75SAndroid Build Coastguard Worker #ifdef LIBC_TARGET_CPU_HAS_FMA
76*71db0c75SAndroid Build Coastguard Worker     double sum_sq_lo = fputil::multiply_add(bd, bd, a_sq - sum_sq);
77*71db0c75SAndroid Build Coastguard Worker     double err = sum_sq_lo - fputil::multiply_add(r_d, r_d, -sum_sq);
78*71db0c75SAndroid Build Coastguard Worker #else
79*71db0c75SAndroid Build Coastguard Worker     fputil::DoubleDouble r_sq = fputil::exact_mult(r_d, r_d);
80*71db0c75SAndroid Build Coastguard Worker     double sum_sq_lo = b_sq - (sum_sq - a_sq);
81*71db0c75SAndroid Build Coastguard Worker     double err = (sum_sq - r_sq.hi) + (sum_sq_lo - r_sq.lo);
82*71db0c75SAndroid Build Coastguard Worker #endif
83*71db0c75SAndroid Build Coastguard Worker 
84*71db0c75SAndroid Build Coastguard Worker     if (err > 0) {
85*71db0c75SAndroid Build Coastguard Worker       r_u |= 1;
86*71db0c75SAndroid Build Coastguard Worker     } else if ((err < 0) && (r_u & 1) == 0) {
87*71db0c75SAndroid Build Coastguard Worker       r_u -= 1;
88*71db0c75SAndroid Build Coastguard Worker     } else if ((r_u & 0x0000'0000'1FFF'FFFF) == 0) {
89*71db0c75SAndroid Build Coastguard Worker       // The rounded result is exact.
90*71db0c75SAndroid Build Coastguard Worker       fputil::clear_except_if_required(FE_INEXACT);
91*71db0c75SAndroid Build Coastguard Worker     }
92*71db0c75SAndroid Build Coastguard Worker     return static_cast<float>(DoubleBits(r_u).get_val());
93*71db0c75SAndroid Build Coastguard Worker   }
94*71db0c75SAndroid Build Coastguard Worker 
95*71db0c75SAndroid Build Coastguard Worker   return static_cast<float>(result.get_val());
96*71db0c75SAndroid Build Coastguard Worker }
97*71db0c75SAndroid Build Coastguard Worker 
98*71db0c75SAndroid Build Coastguard Worker } // namespace LIBC_NAMESPACE_DECL
99