xref: /aosp_15_r20/external/llvm-libc/src/math/generic/cbrtf.cpp (revision 71db0c75aadcf003ffe3238005f61d7618a3fead)
1*71db0c75SAndroid Build Coastguard Worker //===-- Implementation of cbrtf function ----------------------------------===//
2*71db0c75SAndroid Build Coastguard Worker //
3*71db0c75SAndroid Build Coastguard Worker // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*71db0c75SAndroid Build Coastguard Worker // See https://llvm.org/LICENSE.txt for license information.
5*71db0c75SAndroid Build Coastguard Worker // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*71db0c75SAndroid Build Coastguard Worker //
7*71db0c75SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
8*71db0c75SAndroid Build Coastguard Worker 
9*71db0c75SAndroid Build Coastguard Worker #include "src/math/cbrtf.h"
10*71db0c75SAndroid Build Coastguard Worker #include "hdr/fenv_macros.h"
11*71db0c75SAndroid Build Coastguard Worker #include "src/__support/FPUtil/FEnvImpl.h"
12*71db0c75SAndroid Build Coastguard Worker #include "src/__support/FPUtil/FPBits.h"
13*71db0c75SAndroid Build Coastguard Worker #include "src/__support/FPUtil/multiply_add.h"
14*71db0c75SAndroid Build Coastguard Worker #include "src/__support/common.h"
15*71db0c75SAndroid Build Coastguard Worker #include "src/__support/macros/config.h"
16*71db0c75SAndroid Build Coastguard Worker #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
17*71db0c75SAndroid Build Coastguard Worker 
18*71db0c75SAndroid Build Coastguard Worker namespace LIBC_NAMESPACE_DECL {
19*71db0c75SAndroid Build Coastguard Worker 
20*71db0c75SAndroid Build Coastguard Worker namespace {
21*71db0c75SAndroid Build Coastguard Worker 
22*71db0c75SAndroid Build Coastguard Worker // Look up table for 2^(i/3) for i = 0, 1, 2.
23*71db0c75SAndroid Build Coastguard Worker constexpr double CBRT2[3] = {1.0, 0x1.428a2f98d728bp0, 0x1.965fea53d6e3dp0};
24*71db0c75SAndroid Build Coastguard Worker 
25*71db0c75SAndroid Build Coastguard Worker // Degree-7 polynomials approximation of ((1 + x)^(1/3) - 1)/x for 0 <= x <= 1
26*71db0c75SAndroid Build Coastguard Worker // generated by Sollya with:
27*71db0c75SAndroid Build Coastguard Worker // > for i from 0 to 15 do {
28*71db0c75SAndroid Build Coastguard Worker //     P = fpminimax((1 + x)^(1/3) - 1)/x, 6, [|D...|], [i/16, (i + 1)/16]);
29*71db0c75SAndroid Build Coastguard Worker //     print("{", coeff(P, 0), ",", coeff(P, 1), ",", coeff(P, 2), ",",
30*71db0c75SAndroid Build Coastguard Worker //           coeff(P, 3), ",", coeff(P, 4), ",", coeff(P, 5), ",",
31*71db0c75SAndroid Build Coastguard Worker //           coeff(P, 6), "},");
32*71db0c75SAndroid Build Coastguard Worker // };
33*71db0c75SAndroid Build Coastguard Worker // Then (1 + x)^(1/3) ~ 1 + x * P(x).
34*71db0c75SAndroid Build Coastguard Worker constexpr double COEFFS[16][7] = {
35*71db0c75SAndroid Build Coastguard Worker     {0x1.55555555554ebp-2, -0x1.c71c71c678c0cp-4, 0x1.f9add2776de81p-5,
36*71db0c75SAndroid Build Coastguard Worker      -0x1.511e10aa964a7p-5, 0x1.ee44165937fa2p-6, -0x1.7c5c9e059345dp-6,
37*71db0c75SAndroid Build Coastguard Worker      0x1.047f75e0aff14p-6},
38*71db0c75SAndroid Build Coastguard Worker     {0x1.5555554d1149ap-2, -0x1.c71c676fcb5bp-4, 0x1.f9ab127dc57ebp-5,
39*71db0c75SAndroid Build Coastguard Worker      -0x1.50ea8fd1d4c15p-5, 0x1.e9d68f28ced43p-6, -0x1.60e0e1e661311p-6,
40*71db0c75SAndroid Build Coastguard Worker      0x1.716eca1d6e3bcp-7},
41*71db0c75SAndroid Build Coastguard Worker     {0x1.5555546377d45p-2, -0x1.c71bc1c6d49d2p-4, 0x1.f9924cc0ed24dp-5,
42*71db0c75SAndroid Build Coastguard Worker      -0x1.4fea3beb53b3bp-5, 0x1.de028a9a07b1bp-6, -0x1.3b090d2233524p-6,
43*71db0c75SAndroid Build Coastguard Worker      0x1.0aeca34893785p-7},
44*71db0c75SAndroid Build Coastguard Worker     {0x1.55554dce9f649p-2, -0x1.c7188b34b98f8p-4, 0x1.f93e1af34af49p-5,
45*71db0c75SAndroid Build Coastguard Worker      -0x1.4d9a06be75c63p-5, 0x1.cb943f4f68992p-6, -0x1.139a685a5e3c4p-6,
46*71db0c75SAndroid Build Coastguard Worker      0x1.88410674c6a5dp-8},
47*71db0c75SAndroid Build Coastguard Worker     {0x1.5555347d211c3p-2, -0x1.c70f2a4b1a5fap-4, 0x1.f88420e8602c3p-5,
48*71db0c75SAndroid Build Coastguard Worker      -0x1.49becfa4ed3ep-5, 0x1.b475cd9013162p-6, -0x1.dcfee1dd2f8efp-7,
49*71db0c75SAndroid Build Coastguard Worker      0x1.249bb51a1c498p-8},
50*71db0c75SAndroid Build Coastguard Worker     {0x1.5554f01b33dbap-2, -0x1.c6facb929dbf1p-4, 0x1.f73fb7861252ep-5,
51*71db0c75SAndroid Build Coastguard Worker      -0x1.4459a4a0071fap-5, 0x1.9a8df2b504fc2p-6, -0x1.9a7ce3006d06ep-7,
52*71db0c75SAndroid Build Coastguard Worker      0x1.ba9230918fa2ep-9},
53*71db0c75SAndroid Build Coastguard Worker     {0x1.55545c695db5fp-2, -0x1.c6d6089f20275p-4, 0x1.f556e0ea80efp-5,
54*71db0c75SAndroid Build Coastguard Worker      -0x1.3d91372d083f4p-5, 0x1.7f66cff331f4p-6, -0x1.606a562491737p-7,
55*71db0c75SAndroid Build Coastguard Worker      0x1.52e3e17c71069p-9},
56*71db0c75SAndroid Build Coastguard Worker     {0x1.55534a879232ap-2, -0x1.c69b836998b84p-4, 0x1.f2bb26dac0e4cp-5,
57*71db0c75SAndroid Build Coastguard Worker      -0x1.359eed43716d7p-5, 0x1.64218cd824fbcp-6, -0x1.2e703e2e091e8p-7,
58*71db0c75SAndroid Build Coastguard Worker      0x1.0677d9af6aad4p-9},
59*71db0c75SAndroid Build Coastguard Worker     {0x1.5551836bb5494p-2, -0x1.c64658c15353bp-4, 0x1.ef68517451a6ep-5,
60*71db0c75SAndroid Build Coastguard Worker      -0x1.2cc20a980dceep-5, 0x1.49843e0fad93ap-6, -0x1.03c59ccb68e54p-7,
61*71db0c75SAndroid Build Coastguard Worker      0x1.9ad325dc7adcbp-10},
62*71db0c75SAndroid Build Coastguard Worker     {0x1.554ecacb0d035p-2, -0x1.c5d2664026ffcp-4, 0x1.eb624796ba809p-5,
63*71db0c75SAndroid Build Coastguard Worker      -0x1.233803d19a535p-5, 0x1.300decb1c3c28p-6, -0x1.befe18031ec3dp-8,
64*71db0c75SAndroid Build Coastguard Worker      0x1.449f5ee175c69p-10},
65*71db0c75SAndroid Build Coastguard Worker     {0x1.554ae1f5ae815p-2, -0x1.c53c6b14ff6b2p-4, 0x1.e6b2d5127bb5bp-5,
66*71db0c75SAndroid Build Coastguard Worker      -0x1.19387336788a3p-5, 0x1.180955a6ab255p-6, -0x1.81696703ba369p-8,
67*71db0c75SAndroid Build Coastguard Worker      0x1.02cb36389bd79p-10},
68*71db0c75SAndroid Build Coastguard Worker     {0x1.55458a59f356ep-2, -0x1.c4820dd631ae9p-4, 0x1.e167af818bd15p-5,
69*71db0c75SAndroid Build Coastguard Worker      -0x1.0ef35f6f72e52p-5, 0x1.019c33b65e4ebp-6, -0x1.4d25bdd52d3a5p-8,
70*71db0c75SAndroid Build Coastguard Worker      0x1.a008ae91f5936p-11},
71*71db0c75SAndroid Build Coastguard Worker     {0x1.553e878eafee1p-2, -0x1.c3a1d0b2a3db2p-4, 0x1.db90d8ed9f89bp-5,
72*71db0c75SAndroid Build Coastguard Worker      -0x1.0490e20f1ae91p-5, 0x1.d9a5d1fc42fe3p-7, -0x1.20bf8227c2abfp-8,
73*71db0c75SAndroid Build Coastguard Worker      0x1.50f8174cdb6e9p-11},
74*71db0c75SAndroid Build Coastguard Worker     {0x1.5535a0dedf1b1p-2, -0x1.c29afb8bd01a1p-4, 0x1.d53f6371c1e27p-5,
75*71db0c75SAndroid Build Coastguard Worker      -0x1.f463209b433e2p-6, 0x1.b35222a17e44p-7, -0x1.f5efbf505e133p-9,
76*71db0c75SAndroid Build Coastguard Worker      0x1.12e0e94e8586dp-11},
77*71db0c75SAndroid Build Coastguard Worker     {0x1.552aa25e57bfdp-2, -0x1.c16d811e4acadp-4, 0x1.ce8489b47aa51p-5,
78*71db0c75SAndroid Build Coastguard Worker      -0x1.dfde7ff758ea8p-6, 0x1.901f43aac38c8p-7, -0x1.b581d07df5ad5p-9,
79*71db0c75SAndroid Build Coastguard Worker      0x1.c3726535f1fc6p-12},
80*71db0c75SAndroid Build Coastguard Worker     {0x1.551d5d9b204d3p-2, -0x1.c019e328f8db1p-4, 0x1.c7710f44fc3cep-5,
81*71db0c75SAndroid Build Coastguard Worker      -0x1.cbbbe25ea8ba4p-6, 0x1.6fe270088623dp-7, -0x1.7e6fc79733761p-9,
82*71db0c75SAndroid Build Coastguard Worker      0x1.75077abf18d84p-12},
83*71db0c75SAndroid Build Coastguard Worker };
84*71db0c75SAndroid Build Coastguard Worker 
85*71db0c75SAndroid Build Coastguard Worker } // anonymous namespace
86*71db0c75SAndroid Build Coastguard Worker 
87*71db0c75SAndroid Build Coastguard Worker LLVM_LIBC_FUNCTION(float, cbrtf, (float x)) {
88*71db0c75SAndroid Build Coastguard Worker   using FloatBits = typename fputil::FPBits<float>;
89*71db0c75SAndroid Build Coastguard Worker   using DoubleBits = typename fputil::FPBits<double>;
90*71db0c75SAndroid Build Coastguard Worker 
91*71db0c75SAndroid Build Coastguard Worker   FloatBits x_bits(x);
92*71db0c75SAndroid Build Coastguard Worker 
93*71db0c75SAndroid Build Coastguard Worker   uint32_t x_abs = x_bits.uintval() & 0x7fff'ffff;
94*71db0c75SAndroid Build Coastguard Worker   uint32_t sign_bit = (x_bits.uintval() >> 31) << DoubleBits::EXP_LEN;
95*71db0c75SAndroid Build Coastguard Worker 
96*71db0c75SAndroid Build Coastguard Worker   if (LIBC_UNLIKELY(x == 0.0f || x_abs >= 0x7f80'0000)) {
97*71db0c75SAndroid Build Coastguard Worker     // x is 0, Inf, or NaN.
98*71db0c75SAndroid Build Coastguard Worker     // Make sure it works for FTZ/DAZ modes.
99*71db0c75SAndroid Build Coastguard Worker     return x + x;
100*71db0c75SAndroid Build Coastguard Worker   }
101*71db0c75SAndroid Build Coastguard Worker 
102*71db0c75SAndroid Build Coastguard Worker   double xd = static_cast<double>(x);
103*71db0c75SAndroid Build Coastguard Worker   DoubleBits xd_bits(xd);
104*71db0c75SAndroid Build Coastguard Worker 
105*71db0c75SAndroid Build Coastguard Worker   // When using biased exponent of x in double precision,
106*71db0c75SAndroid Build Coastguard Worker   //   x_e = real_exponent_of_x + 1023
107*71db0c75SAndroid Build Coastguard Worker   // Then:
108*71db0c75SAndroid Build Coastguard Worker   //   x_e / 3 = real_exponent_of_x / 3 + 1023/3
109*71db0c75SAndroid Build Coastguard Worker   //           = real_exponent_of_x / 3 + 341
110*71db0c75SAndroid Build Coastguard Worker   // So to make it the correct biased exponent of x^(1/3), we add
111*71db0c75SAndroid Build Coastguard Worker   //   1023 - 341 = 682
112*71db0c75SAndroid Build Coastguard Worker   // to the quotient x_e / 3.
113*71db0c75SAndroid Build Coastguard Worker   unsigned x_e = static_cast<unsigned>(xd_bits.get_biased_exponent());
114*71db0c75SAndroid Build Coastguard Worker   unsigned out_e = (x_e / 3 + 682) | sign_bit;
115*71db0c75SAndroid Build Coastguard Worker   unsigned shift_e = x_e % 3;
116*71db0c75SAndroid Build Coastguard Worker 
117*71db0c75SAndroid Build Coastguard Worker   // Set x_m = 2^(x_e % 3) * (1.mantissa)
118*71db0c75SAndroid Build Coastguard Worker   uint64_t x_m = xd_bits.get_mantissa();
119*71db0c75SAndroid Build Coastguard Worker   // Use the leading 4 bits for look up table
120*71db0c75SAndroid Build Coastguard Worker   unsigned idx = static_cast<unsigned>(x_m >> (DoubleBits::FRACTION_LEN - 4));
121*71db0c75SAndroid Build Coastguard Worker 
122*71db0c75SAndroid Build Coastguard Worker   x_m |= static_cast<uint64_t>(DoubleBits::EXP_BIAS)
123*71db0c75SAndroid Build Coastguard Worker          << DoubleBits::FRACTION_LEN;
124*71db0c75SAndroid Build Coastguard Worker 
125*71db0c75SAndroid Build Coastguard Worker   double x_reduced = DoubleBits(x_m).get_val();
126*71db0c75SAndroid Build Coastguard Worker   double dx = x_reduced - 1.0;
127*71db0c75SAndroid Build Coastguard Worker 
128*71db0c75SAndroid Build Coastguard Worker   double dx_sq = dx * dx;
129*71db0c75SAndroid Build Coastguard Worker   double c0 = fputil::multiply_add(dx, COEFFS[idx][0], 1.0);
130*71db0c75SAndroid Build Coastguard Worker   double c1 = fputil::multiply_add(dx, COEFFS[idx][2], COEFFS[idx][1]);
131*71db0c75SAndroid Build Coastguard Worker   double c2 = fputil::multiply_add(dx, COEFFS[idx][4], COEFFS[idx][3]);
132*71db0c75SAndroid Build Coastguard Worker   double c3 = fputil::multiply_add(dx, COEFFS[idx][6], COEFFS[idx][5]);
133*71db0c75SAndroid Build Coastguard Worker 
134*71db0c75SAndroid Build Coastguard Worker   double dx_4 = dx_sq * dx_sq;
135*71db0c75SAndroid Build Coastguard Worker   double p0 = fputil::multiply_add(dx_sq, c1, c0);
136*71db0c75SAndroid Build Coastguard Worker   double p1 = fputil::multiply_add(dx_sq, c3, c2);
137*71db0c75SAndroid Build Coastguard Worker 
138*71db0c75SAndroid Build Coastguard Worker   double r = fputil::multiply_add(dx_4, p1, p0) * CBRT2[shift_e];
139*71db0c75SAndroid Build Coastguard Worker 
140*71db0c75SAndroid Build Coastguard Worker   uint64_t r_m = DoubleBits(r).get_mantissa();
141*71db0c75SAndroid Build Coastguard Worker   // Check if the output is exact.  To be exact, the smallest 1-bit of the
142*71db0c75SAndroid Build Coastguard Worker   // output has to be at least 2^-7 or higher.  So we check the lowest 44 bits
143*71db0c75SAndroid Build Coastguard Worker   // to see if they are within 2^(-52 + 3) errors from all zeros, then the
144*71db0c75SAndroid Build Coastguard Worker   // result cube root is exact.
145*71db0c75SAndroid Build Coastguard Worker   if (LIBC_UNLIKELY(((r_m + 8) & 0xfffffffffff) <= 16)) {
146*71db0c75SAndroid Build Coastguard Worker     if ((r_m & 0xfffffffffff) <= 8)
147*71db0c75SAndroid Build Coastguard Worker       r_m &= 0xffff'ffff'ffff'ffe0;
148*71db0c75SAndroid Build Coastguard Worker     else
149*71db0c75SAndroid Build Coastguard Worker       r_m = (r_m & 0xffff'ffff'ffff'ffe0) + 0x20;
150*71db0c75SAndroid Build Coastguard Worker     fputil::clear_except_if_required(FE_INEXACT);
151*71db0c75SAndroid Build Coastguard Worker   }
152*71db0c75SAndroid Build Coastguard Worker   // Adjust exponent and sign.
153*71db0c75SAndroid Build Coastguard Worker   uint64_t r_bits =
154*71db0c75SAndroid Build Coastguard Worker       r_m | (static_cast<uint64_t>(out_e) << DoubleBits::FRACTION_LEN);
155*71db0c75SAndroid Build Coastguard Worker 
156*71db0c75SAndroid Build Coastguard Worker   return static_cast<float>(DoubleBits(r_bits).get_val());
157*71db0c75SAndroid Build Coastguard Worker }
158*71db0c75SAndroid Build Coastguard Worker 
159*71db0c75SAndroid Build Coastguard Worker } // namespace LIBC_NAMESPACE_DECL
160