xref: /aosp_15_r20/external/arm-optimized-routines/math/tools/tgamma128_gen.jl (revision 412f47f9e737e10ed5cc46ec6a8d7fa2264f8a14)
1*412f47f9SXin Li# -*- julia -*-
2*412f47f9SXin Li#
3*412f47f9SXin Li# Generate tgamma128.h, containing polynomials and constants used by
4*412f47f9SXin Li# tgamma128.c.
5*412f47f9SXin Li#
6*412f47f9SXin Li# Copyright (c) 2006-2023, Arm Limited.
7*412f47f9SXin Li# SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
8*412f47f9SXin Li
9*412f47f9SXin Li# This Julia program depends on the 'Remez' and 'SpecialFunctions'
10*412f47f9SXin Li# library packages. To install them, run this at the interactive Julia
11*412f47f9SXin Li# prompt:
12*412f47f9SXin Li#
13*412f47f9SXin Li#   import Pkg; Pkg.add(["Remez", "SpecialFunctions"])
14*412f47f9SXin Li#
15*412f47f9SXin Li# Tested on Julia 1.4.1 (Ubuntu 20.04) and 1.9.0 (22.04).
16*412f47f9SXin Li
17*412f47f9SXin Liimport Printf
18*412f47f9SXin Liimport Remez
19*412f47f9SXin Liimport SpecialFunctions
20*412f47f9SXin Li
21*412f47f9SXin Li# Round a BigFloat to 128-bit long double and format it as a C99 hex
22*412f47f9SXin Li# float literal.
23*412f47f9SXin Lifunction quadhex(x)
24*412f47f9SXin Li    sign = " "
25*412f47f9SXin Li    if x < 0
26*412f47f9SXin Li        sign = "-"
27*412f47f9SXin Li        x = -x
28*412f47f9SXin Li    end
29*412f47f9SXin Li
30*412f47f9SXin Li    exponent = BigInt(floor(log2(x)))
31*412f47f9SXin Li    exponent = max(exponent, -16382)
32*412f47f9SXin Li    @assert(exponent <= 16383) # else overflow
33*412f47f9SXin Li
34*412f47f9SXin Li    x /= BigFloat(2)^exponent
35*412f47f9SXin Li    @assert(1 <= x < 2)
36*412f47f9SXin Li    x *= BigFloat(2)^112
37*412f47f9SXin Li    mantissa = BigInt(round(x))
38*412f47f9SXin Li
39*412f47f9SXin Li    mantstr = string(mantissa, base=16, pad=29)
40*412f47f9SXin Li    return Printf.@sprintf("%s0x%s.%sp%+dL", sign, mantstr[1], mantstr[2:end],
41*412f47f9SXin Li                           exponent)
42*412f47f9SXin Liend
43*412f47f9SXin Li
44*412f47f9SXin Li# Round a BigFloat to 128-bit long double and return it still as a
45*412f47f9SXin Li# BigFloat.
46*412f47f9SXin Lifunction quadval(x, round=0)
47*412f47f9SXin Li    sign = +1
48*412f47f9SXin Li    if x.sign < 0
49*412f47f9SXin Li        sign = -1
50*412f47f9SXin Li        x = -x
51*412f47f9SXin Li    end
52*412f47f9SXin Li
53*412f47f9SXin Li    exponent = BigInt(floor(log2(x)))
54*412f47f9SXin Li    exponent = max(exponent, -16382)
55*412f47f9SXin Li    @assert(exponent <= 16383) # else overflow
56*412f47f9SXin Li
57*412f47f9SXin Li    x /= BigFloat(2)^exponent
58*412f47f9SXin Li    @assert(1 <= x < 2)
59*412f47f9SXin Li    x *= BigFloat(2)^112
60*412f47f9SXin Li    if round < 0
61*412f47f9SXin Li        mantissa = floor(x)
62*412f47f9SXin Li    elseif round > 0
63*412f47f9SXin Li        mantissa = ceil(x)
64*412f47f9SXin Li    else
65*412f47f9SXin Li        mantissa = round(x)
66*412f47f9SXin Li    end
67*412f47f9SXin Li
68*412f47f9SXin Li    return sign * mantissa * BigFloat(2)^(exponent - 112)
69*412f47f9SXin Liend
70*412f47f9SXin Li
71*412f47f9SXin Li# Output an array of BigFloats as a C array declaration.
72*412f47f9SXin Lifunction dumparray(a, name)
73*412f47f9SXin Li    println("static const long double ", name, "[] = {")
74*412f47f9SXin Li    for x in N
75*412f47f9SXin Li        println("    ", quadhex(x), ",")
76*412f47f9SXin Li    end
77*412f47f9SXin Li    println("};")
78*412f47f9SXin Liend
79*412f47f9SXin Li
80*412f47f9SXin Liprint("/*
81*412f47f9SXin Li * Polynomial coefficients and other constants for tgamma128.c.
82*412f47f9SXin Li *
83*412f47f9SXin Li * Copyright (c) 2006,2009,2023 Arm Limited.
84*412f47f9SXin Li * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
85*412f47f9SXin Li */
86*412f47f9SXin Li")
87*412f47f9SXin Li
88*412f47f9SXin LiBase.MPFR.setprecision(512)
89*412f47f9SXin Li
90*412f47f9SXin Lie = exp(BigFloat(1))
91*412f47f9SXin Li
92*412f47f9SXin Liprint("
93*412f47f9SXin Li/* The largest positive value for which 128-bit tgamma does not overflow. */
94*412f47f9SXin Li")
95*412f47f9SXin Lilo = BigFloat("1000")
96*412f47f9SXin Lihi = BigFloat("2000")
97*412f47f9SXin Liwhile true
98*412f47f9SXin Li    global lo
99*412f47f9SXin Li    global hi
100*412f47f9SXin Li    global max_x
101*412f47f9SXin Li
102*412f47f9SXin Li    mid = (lo + hi) / 2
103*412f47f9SXin Li    if mid == lo || mid == hi
104*412f47f9SXin Li        max_x = mid
105*412f47f9SXin Li        break
106*412f47f9SXin Li    end
107*412f47f9SXin Li    if SpecialFunctions.logabsgamma(mid)[1] < 16384 * log(BigFloat(2))
108*412f47f9SXin Li        lo = mid
109*412f47f9SXin Li    else
110*412f47f9SXin Li        hi = mid
111*412f47f9SXin Li    end
112*412f47f9SXin Liend
113*412f47f9SXin Limax_x = quadval(max_x, -1)
114*412f47f9SXin Liprintln("static const long double max_x = ", quadhex(max_x), ";")
115*412f47f9SXin Li
116*412f47f9SXin Liprint("
117*412f47f9SXin Li/* Coefficients of the polynomial used in the tgamma_large() subroutine */
118*412f47f9SXin Li")
119*412f47f9SXin LiN, D, E, X = Remez.ratfn_minimax(
120*412f47f9SXin Li    x -> x==0 ? sqrt(BigFloat(2)*pi/e) :
121*412f47f9SXin Li                exp(SpecialFunctions.logabsgamma(1/x)[1] +
122*412f47f9SXin Li                    (1/x-0.5)*(1+log(x))),
123*412f47f9SXin Li    (0, 1/BigFloat(8)),
124*412f47f9SXin Li    24, 0,
125*412f47f9SXin Li    (x, y) -> 1/y
126*412f47f9SXin Li)
127*412f47f9SXin Lidumparray(N, "coeffs_large")
128*412f47f9SXin Li
129*412f47f9SXin Liprint("
130*412f47f9SXin Li/* Coefficients of the polynomial used in the tgamma_tiny() subroutine */
131*412f47f9SXin Li")
132*412f47f9SXin LiN, D, E, X = Remez.ratfn_minimax(
133*412f47f9SXin Li    x -> x==0 ? 1 : 1/(x*SpecialFunctions.gamma(x)),
134*412f47f9SXin Li    (0, 1/BigFloat(32)),
135*412f47f9SXin Li    13, 0,
136*412f47f9SXin Li)
137*412f47f9SXin Lidumparray(N, "coeffs_tiny")
138*412f47f9SXin Li
139*412f47f9SXin Liprint("
140*412f47f9SXin Li/* The location within the interval [1,2] where gamma has a minimum.
141*412f47f9SXin Li * Specified as the sum of two 128-bit values, for extra precision. */
142*412f47f9SXin Li")
143*412f47f9SXin Lilo = BigFloat("1.4")
144*412f47f9SXin Lihi = BigFloat("1.5")
145*412f47f9SXin Liwhile true
146*412f47f9SXin Li    global lo
147*412f47f9SXin Li    global hi
148*412f47f9SXin Li    global min_x
149*412f47f9SXin Li
150*412f47f9SXin Li    mid = (lo + hi) / 2
151*412f47f9SXin Li    if mid == lo || mid == hi
152*412f47f9SXin Li        min_x = mid
153*412f47f9SXin Li        break
154*412f47f9SXin Li    end
155*412f47f9SXin Li    if SpecialFunctions.digamma(mid) < 0
156*412f47f9SXin Li        lo = mid
157*412f47f9SXin Li    else
158*412f47f9SXin Li        hi = mid
159*412f47f9SXin Li    end
160*412f47f9SXin Liend
161*412f47f9SXin Limin_x_hi = quadval(min_x, -1)
162*412f47f9SXin Liprintln("static const long double min_x_hi = ", quadhex(min_x_hi), ";")
163*412f47f9SXin Liprintln("static const long double min_x_lo = ", quadhex(min_x - min_x_hi), ";")
164*412f47f9SXin Li
165*412f47f9SXin Liprint("
166*412f47f9SXin Li/* The actual minimum value that gamma takes at that location.
167*412f47f9SXin Li * Again specified as the sum of two 128-bit values. */
168*412f47f9SXin Li")
169*412f47f9SXin Limin_y = SpecialFunctions.gamma(min_x)
170*412f47f9SXin Limin_y_hi = quadval(min_y, -1)
171*412f47f9SXin Liprintln("static const long double min_y_hi = ", quadhex(min_y_hi), ";")
172*412f47f9SXin Liprintln("static const long double min_y_lo = ", quadhex(min_y - min_y_hi), ";")
173*412f47f9SXin Li
174*412f47f9SXin Lifunction taylor_bodge(x)
175*412f47f9SXin Li    # Taylor series generated by Wolfram Alpha for (gamma(min_x+x)-min_y)/x^2.
176*412f47f9SXin Li    # Used in the Remez calls below for x values very near the origin, to avoid
177*412f47f9SXin Li    # significance loss problems when trying to compute it directly via that
178*412f47f9SXin Li    # formula (even in MPFR's extra precision).
179*412f47f9SXin Li    return BigFloat("0.428486815855585429730209907810650582960483696962660010556335457558784421896667728014324097132413696263704801646004585959298743677879606168187061990204432200")+x*(-BigFloat("0.130704158939785761928008749242671025181542078105370084716141350308119418619652583986015464395882363802104154017741656168641240436089858504560718773026275797")+x*(BigFloat("0.160890753325112844190519489594363387594505844658437718135952967735294789599989664428071656484587979507034160383271974554122934842441540146372016567834062876")+x*(-BigFloat("0.092277030213334350126864106458600575084335085690780082222880945224248438672595248111704471182201673989215223667543694847795410779036800385804729955729659506"))))
180*412f47f9SXin Liend
181*412f47f9SXin Li
182*412f47f9SXin Liprint("
183*412f47f9SXin Li/* Coefficients of the polynomial used in the tgamma_central() subroutine
184*412f47f9SXin Li * for computing gamma on the interval [1,min_x] */
185*412f47f9SXin Li")
186*412f47f9SXin LiN, D, E, X = Remez.ratfn_minimax(
187*412f47f9SXin Li    x -> x < BigFloat(0x1p-64) ? taylor_bodge(-x) :
188*412f47f9SXin Li        (SpecialFunctions.gamma(min_x - x) - min_y) / (x*x),
189*412f47f9SXin Li    (0, min_x - 1),
190*412f47f9SXin Li    31, 0,
191*412f47f9SXin Li    (x, y) -> x^2,
192*412f47f9SXin Li)
193*412f47f9SXin Lidumparray(N, "coeffs_central_neg")
194*412f47f9SXin Li
195*412f47f9SXin Liprint("
196*412f47f9SXin Li/* Coefficients of the polynomial used in the tgamma_central() subroutine
197*412f47f9SXin Li * for computing gamma on the interval [min_x,2] */
198*412f47f9SXin Li")
199*412f47f9SXin LiN, D, E, X = Remez.ratfn_minimax(
200*412f47f9SXin Li    x -> x < BigFloat(0x1p-64) ? taylor_bodge(x) :
201*412f47f9SXin Li        (SpecialFunctions.gamma(min_x + x) - min_y) / (x*x),
202*412f47f9SXin Li    (0, 2 - min_x),
203*412f47f9SXin Li    28, 0,
204*412f47f9SXin Li    (x, y) -> x^2,
205*412f47f9SXin Li)
206*412f47f9SXin Lidumparray(N, "coeffs_central_pos")
207*412f47f9SXin Li
208*412f47f9SXin Liprint("
209*412f47f9SXin Li/* 128-bit float value of pi, used by the sin_pi_x_over_pi subroutine
210*412f47f9SXin Li */
211*412f47f9SXin Li")
212*412f47f9SXin Liprintln("static const long double pi = ", quadhex(BigFloat(pi)), ";")
213