1*412f47f9SXin Li# -*- julia -*- 2*412f47f9SXin Li# 3*412f47f9SXin Li# Generate tgamma128.h, containing polynomials and constants used by 4*412f47f9SXin Li# tgamma128.c. 5*412f47f9SXin Li# 6*412f47f9SXin Li# Copyright (c) 2006-2023, Arm Limited. 7*412f47f9SXin Li# SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 8*412f47f9SXin Li 9*412f47f9SXin Li# This Julia program depends on the 'Remez' and 'SpecialFunctions' 10*412f47f9SXin Li# library packages. To install them, run this at the interactive Julia 11*412f47f9SXin Li# prompt: 12*412f47f9SXin Li# 13*412f47f9SXin Li# import Pkg; Pkg.add(["Remez", "SpecialFunctions"]) 14*412f47f9SXin Li# 15*412f47f9SXin Li# Tested on Julia 1.4.1 (Ubuntu 20.04) and 1.9.0 (22.04). 16*412f47f9SXin Li 17*412f47f9SXin Liimport Printf 18*412f47f9SXin Liimport Remez 19*412f47f9SXin Liimport SpecialFunctions 20*412f47f9SXin Li 21*412f47f9SXin Li# Round a BigFloat to 128-bit long double and format it as a C99 hex 22*412f47f9SXin Li# float literal. 23*412f47f9SXin Lifunction quadhex(x) 24*412f47f9SXin Li sign = " " 25*412f47f9SXin Li if x < 0 26*412f47f9SXin Li sign = "-" 27*412f47f9SXin Li x = -x 28*412f47f9SXin Li end 29*412f47f9SXin Li 30*412f47f9SXin Li exponent = BigInt(floor(log2(x))) 31*412f47f9SXin Li exponent = max(exponent, -16382) 32*412f47f9SXin Li @assert(exponent <= 16383) # else overflow 33*412f47f9SXin Li 34*412f47f9SXin Li x /= BigFloat(2)^exponent 35*412f47f9SXin Li @assert(1 <= x < 2) 36*412f47f9SXin Li x *= BigFloat(2)^112 37*412f47f9SXin Li mantissa = BigInt(round(x)) 38*412f47f9SXin Li 39*412f47f9SXin Li mantstr = string(mantissa, base=16, pad=29) 40*412f47f9SXin Li return Printf.@sprintf("%s0x%s.%sp%+dL", sign, mantstr[1], mantstr[2:end], 41*412f47f9SXin Li exponent) 42*412f47f9SXin Liend 43*412f47f9SXin Li 44*412f47f9SXin Li# Round a BigFloat to 128-bit long double and return it still as a 45*412f47f9SXin Li# BigFloat. 46*412f47f9SXin Lifunction quadval(x, round=0) 47*412f47f9SXin Li sign = +1 48*412f47f9SXin Li if x.sign < 0 49*412f47f9SXin Li sign = -1 50*412f47f9SXin Li x = -x 51*412f47f9SXin Li end 52*412f47f9SXin Li 53*412f47f9SXin Li exponent = BigInt(floor(log2(x))) 54*412f47f9SXin Li exponent = max(exponent, -16382) 55*412f47f9SXin Li @assert(exponent <= 16383) # else overflow 56*412f47f9SXin Li 57*412f47f9SXin Li x /= BigFloat(2)^exponent 58*412f47f9SXin Li @assert(1 <= x < 2) 59*412f47f9SXin Li x *= BigFloat(2)^112 60*412f47f9SXin Li if round < 0 61*412f47f9SXin Li mantissa = floor(x) 62*412f47f9SXin Li elseif round > 0 63*412f47f9SXin Li mantissa = ceil(x) 64*412f47f9SXin Li else 65*412f47f9SXin Li mantissa = round(x) 66*412f47f9SXin Li end 67*412f47f9SXin Li 68*412f47f9SXin Li return sign * mantissa * BigFloat(2)^(exponent - 112) 69*412f47f9SXin Liend 70*412f47f9SXin Li 71*412f47f9SXin Li# Output an array of BigFloats as a C array declaration. 72*412f47f9SXin Lifunction dumparray(a, name) 73*412f47f9SXin Li println("static const long double ", name, "[] = {") 74*412f47f9SXin Li for x in N 75*412f47f9SXin Li println(" ", quadhex(x), ",") 76*412f47f9SXin Li end 77*412f47f9SXin Li println("};") 78*412f47f9SXin Liend 79*412f47f9SXin Li 80*412f47f9SXin Liprint("/* 81*412f47f9SXin Li * Polynomial coefficients and other constants for tgamma128.c. 82*412f47f9SXin Li * 83*412f47f9SXin Li * Copyright (c) 2006,2009,2023 Arm Limited. 84*412f47f9SXin Li * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 85*412f47f9SXin Li */ 86*412f47f9SXin Li") 87*412f47f9SXin Li 88*412f47f9SXin LiBase.MPFR.setprecision(512) 89*412f47f9SXin Li 90*412f47f9SXin Lie = exp(BigFloat(1)) 91*412f47f9SXin Li 92*412f47f9SXin Liprint(" 93*412f47f9SXin Li/* The largest positive value for which 128-bit tgamma does not overflow. */ 94*412f47f9SXin Li") 95*412f47f9SXin Lilo = BigFloat("1000") 96*412f47f9SXin Lihi = BigFloat("2000") 97*412f47f9SXin Liwhile true 98*412f47f9SXin Li global lo 99*412f47f9SXin Li global hi 100*412f47f9SXin Li global max_x 101*412f47f9SXin Li 102*412f47f9SXin Li mid = (lo + hi) / 2 103*412f47f9SXin Li if mid == lo || mid == hi 104*412f47f9SXin Li max_x = mid 105*412f47f9SXin Li break 106*412f47f9SXin Li end 107*412f47f9SXin Li if SpecialFunctions.logabsgamma(mid)[1] < 16384 * log(BigFloat(2)) 108*412f47f9SXin Li lo = mid 109*412f47f9SXin Li else 110*412f47f9SXin Li hi = mid 111*412f47f9SXin Li end 112*412f47f9SXin Liend 113*412f47f9SXin Limax_x = quadval(max_x, -1) 114*412f47f9SXin Liprintln("static const long double max_x = ", quadhex(max_x), ";") 115*412f47f9SXin Li 116*412f47f9SXin Liprint(" 117*412f47f9SXin Li/* Coefficients of the polynomial used in the tgamma_large() subroutine */ 118*412f47f9SXin Li") 119*412f47f9SXin LiN, D, E, X = Remez.ratfn_minimax( 120*412f47f9SXin Li x -> x==0 ? sqrt(BigFloat(2)*pi/e) : 121*412f47f9SXin Li exp(SpecialFunctions.logabsgamma(1/x)[1] + 122*412f47f9SXin Li (1/x-0.5)*(1+log(x))), 123*412f47f9SXin Li (0, 1/BigFloat(8)), 124*412f47f9SXin Li 24, 0, 125*412f47f9SXin Li (x, y) -> 1/y 126*412f47f9SXin Li) 127*412f47f9SXin Lidumparray(N, "coeffs_large") 128*412f47f9SXin Li 129*412f47f9SXin Liprint(" 130*412f47f9SXin Li/* Coefficients of the polynomial used in the tgamma_tiny() subroutine */ 131*412f47f9SXin Li") 132*412f47f9SXin LiN, D, E, X = Remez.ratfn_minimax( 133*412f47f9SXin Li x -> x==0 ? 1 : 1/(x*SpecialFunctions.gamma(x)), 134*412f47f9SXin Li (0, 1/BigFloat(32)), 135*412f47f9SXin Li 13, 0, 136*412f47f9SXin Li) 137*412f47f9SXin Lidumparray(N, "coeffs_tiny") 138*412f47f9SXin Li 139*412f47f9SXin Liprint(" 140*412f47f9SXin Li/* The location within the interval [1,2] where gamma has a minimum. 141*412f47f9SXin Li * Specified as the sum of two 128-bit values, for extra precision. */ 142*412f47f9SXin Li") 143*412f47f9SXin Lilo = BigFloat("1.4") 144*412f47f9SXin Lihi = BigFloat("1.5") 145*412f47f9SXin Liwhile true 146*412f47f9SXin Li global lo 147*412f47f9SXin Li global hi 148*412f47f9SXin Li global min_x 149*412f47f9SXin Li 150*412f47f9SXin Li mid = (lo + hi) / 2 151*412f47f9SXin Li if mid == lo || mid == hi 152*412f47f9SXin Li min_x = mid 153*412f47f9SXin Li break 154*412f47f9SXin Li end 155*412f47f9SXin Li if SpecialFunctions.digamma(mid) < 0 156*412f47f9SXin Li lo = mid 157*412f47f9SXin Li else 158*412f47f9SXin Li hi = mid 159*412f47f9SXin Li end 160*412f47f9SXin Liend 161*412f47f9SXin Limin_x_hi = quadval(min_x, -1) 162*412f47f9SXin Liprintln("static const long double min_x_hi = ", quadhex(min_x_hi), ";") 163*412f47f9SXin Liprintln("static const long double min_x_lo = ", quadhex(min_x - min_x_hi), ";") 164*412f47f9SXin Li 165*412f47f9SXin Liprint(" 166*412f47f9SXin Li/* The actual minimum value that gamma takes at that location. 167*412f47f9SXin Li * Again specified as the sum of two 128-bit values. */ 168*412f47f9SXin Li") 169*412f47f9SXin Limin_y = SpecialFunctions.gamma(min_x) 170*412f47f9SXin Limin_y_hi = quadval(min_y, -1) 171*412f47f9SXin Liprintln("static const long double min_y_hi = ", quadhex(min_y_hi), ";") 172*412f47f9SXin Liprintln("static const long double min_y_lo = ", quadhex(min_y - min_y_hi), ";") 173*412f47f9SXin Li 174*412f47f9SXin Lifunction taylor_bodge(x) 175*412f47f9SXin Li # Taylor series generated by Wolfram Alpha for (gamma(min_x+x)-min_y)/x^2. 176*412f47f9SXin Li # Used in the Remez calls below for x values very near the origin, to avoid 177*412f47f9SXin Li # significance loss problems when trying to compute it directly via that 178*412f47f9SXin Li # formula (even in MPFR's extra precision). 179*412f47f9SXin Li return BigFloat("0.428486815855585429730209907810650582960483696962660010556335457558784421896667728014324097132413696263704801646004585959298743677879606168187061990204432200")+x*(-BigFloat("0.130704158939785761928008749242671025181542078105370084716141350308119418619652583986015464395882363802104154017741656168641240436089858504560718773026275797")+x*(BigFloat("0.160890753325112844190519489594363387594505844658437718135952967735294789599989664428071656484587979507034160383271974554122934842441540146372016567834062876")+x*(-BigFloat("0.092277030213334350126864106458600575084335085690780082222880945224248438672595248111704471182201673989215223667543694847795410779036800385804729955729659506")))) 180*412f47f9SXin Liend 181*412f47f9SXin Li 182*412f47f9SXin Liprint(" 183*412f47f9SXin Li/* Coefficients of the polynomial used in the tgamma_central() subroutine 184*412f47f9SXin Li * for computing gamma on the interval [1,min_x] */ 185*412f47f9SXin Li") 186*412f47f9SXin LiN, D, E, X = Remez.ratfn_minimax( 187*412f47f9SXin Li x -> x < BigFloat(0x1p-64) ? taylor_bodge(-x) : 188*412f47f9SXin Li (SpecialFunctions.gamma(min_x - x) - min_y) / (x*x), 189*412f47f9SXin Li (0, min_x - 1), 190*412f47f9SXin Li 31, 0, 191*412f47f9SXin Li (x, y) -> x^2, 192*412f47f9SXin Li) 193*412f47f9SXin Lidumparray(N, "coeffs_central_neg") 194*412f47f9SXin Li 195*412f47f9SXin Liprint(" 196*412f47f9SXin Li/* Coefficients of the polynomial used in the tgamma_central() subroutine 197*412f47f9SXin Li * for computing gamma on the interval [min_x,2] */ 198*412f47f9SXin Li") 199*412f47f9SXin LiN, D, E, X = Remez.ratfn_minimax( 200*412f47f9SXin Li x -> x < BigFloat(0x1p-64) ? taylor_bodge(x) : 201*412f47f9SXin Li (SpecialFunctions.gamma(min_x + x) - min_y) / (x*x), 202*412f47f9SXin Li (0, 2 - min_x), 203*412f47f9SXin Li 28, 0, 204*412f47f9SXin Li (x, y) -> x^2, 205*412f47f9SXin Li) 206*412f47f9SXin Lidumparray(N, "coeffs_central_pos") 207*412f47f9SXin Li 208*412f47f9SXin Liprint(" 209*412f47f9SXin Li/* 128-bit float value of pi, used by the sin_pi_x_over_pi subroutine 210*412f47f9SXin Li */ 211*412f47f9SXin Li") 212*412f47f9SXin Liprintln("static const long double pi = ", quadhex(BigFloat(pi)), ";") 213