xref: /aosp_15_r20/external/arm-optimized-routines/math/tools/remez.jl (revision 412f47f9e737e10ed5cc46ec6a8d7fa2264f8a14)
1*412f47f9SXin Li#!/usr/bin/env julia
2*412f47f9SXin Li# -*- julia -*-
3*412f47f9SXin Li
4*412f47f9SXin Li# remez.jl - implementation of the Remez algorithm for polynomial approximation
5*412f47f9SXin Li#
6*412f47f9SXin Li# Copyright (c) 2015-2019, Arm Limited.
7*412f47f9SXin Li# SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
8*412f47f9SXin Li
9*412f47f9SXin Liimport Base.\
10*412f47f9SXin Li
11*412f47f9SXin Li# ----------------------------------------------------------------------
12*412f47f9SXin Li# Helper functions to cope with different Julia versions.
13*412f47f9SXin Liif VERSION >= v"0.7.0"
14*412f47f9SXin Li    array1d(T, d) = Array{T, 1}(undef, d)
15*412f47f9SXin Li    array2d(T, d1, d2) = Array{T, 2}(undef, d1, d2)
16*412f47f9SXin Lielse
17*412f47f9SXin Li    array1d(T, d) = Array(T, d)
18*412f47f9SXin Li    array2d(T, d1, d2) = Array(T, d1, d2)
19*412f47f9SXin Liend
20*412f47f9SXin Liif VERSION < v"0.5.0"
21*412f47f9SXin Li    String = ASCIIString
22*412f47f9SXin Liend
23*412f47f9SXin Liif VERSION >= v"0.6.0"
24*412f47f9SXin Li    # Use Base.invokelatest to run functions made using eval(), to
25*412f47f9SXin Li    # avoid "world age" error
26*412f47f9SXin Li    run(f, x...) = Base.invokelatest(f, x...)
27*412f47f9SXin Lielse
28*412f47f9SXin Li    # Prior to 0.6.0, invokelatest doesn't exist (but fortunately the
29*412f47f9SXin Li    # world age problem also doesn't seem to exist)
30*412f47f9SXin Li    run(f, x...) = f(x...)
31*412f47f9SXin Liend
32*412f47f9SXin Li
33*412f47f9SXin Li# ----------------------------------------------------------------------
34*412f47f9SXin Li# Global variables configured by command-line options.
35*412f47f9SXin Lifloatsuffix = "" # adjusted by --floatsuffix
36*412f47f9SXin Lixvarname = "x" # adjusted by --variable
37*412f47f9SXin Liepsbits = 256 # adjusted by --bits
38*412f47f9SXin Lidebug_facilities = Set() # adjusted by --debug
39*412f47f9SXin Lifull_output = false # adjusted by --full
40*412f47f9SXin Liarray_format = false # adjusted by --array
41*412f47f9SXin Lipreliminary_commands = array1d(String, 0) # adjusted by --pre
42*412f47f9SXin Li
43*412f47f9SXin Li# ----------------------------------------------------------------------
44*412f47f9SXin Li# Diagnostic and utility functions.
45*412f47f9SXin Li
46*412f47f9SXin Li# Enable debugging printouts from a particular subpart of this
47*412f47f9SXin Li# program.
48*412f47f9SXin Li#
49*412f47f9SXin Li# Arguments:
50*412f47f9SXin Li#    facility   Name of the facility to debug. For a list of facility names,
51*412f47f9SXin Li#               look through the code for calls to debug().
52*412f47f9SXin Li#
53*412f47f9SXin Li# Return value is a BigFloat.
54*412f47f9SXin Lifunction enable_debug(facility)
55*412f47f9SXin Li    push!(debug_facilities, facility)
56*412f47f9SXin Liend
57*412f47f9SXin Li
58*412f47f9SXin Li# Print a diagnostic.
59*412f47f9SXin Li#
60*412f47f9SXin Li# Arguments:
61*412f47f9SXin Li#    facility   Name of the facility for which this is a debug message.
62*412f47f9SXin Li#    printargs  Arguments to println() if debugging of that facility is
63*412f47f9SXin Li#               enabled.
64*412f47f9SXin Limacro debug(facility, printargs...)
65*412f47f9SXin Li    printit = quote
66*412f47f9SXin Li        print("[", $facility, "] ")
67*412f47f9SXin Li    end
68*412f47f9SXin Li    for arg in printargs
69*412f47f9SXin Li        printit = quote
70*412f47f9SXin Li            $printit
71*412f47f9SXin Li            print($(esc(arg)))
72*412f47f9SXin Li        end
73*412f47f9SXin Li    end
74*412f47f9SXin Li    return quote
75*412f47f9SXin Li        if $facility in debug_facilities
76*412f47f9SXin Li            $printit
77*412f47f9SXin Li            println()
78*412f47f9SXin Li        end
79*412f47f9SXin Li    end
80*412f47f9SXin Liend
81*412f47f9SXin Li
82*412f47f9SXin Li# Evaluate a polynomial.
83*412f47f9SXin Li
84*412f47f9SXin Li# Arguments:
85*412f47f9SXin Li#    coeffs   Array of BigFloats giving the coefficients of the polynomial.
86*412f47f9SXin Li#             Starts with the constant term, i.e. coeffs[i] is the
87*412f47f9SXin Li#             coefficient of x^(i-1) (because Julia arrays are 1-based).
88*412f47f9SXin Li#    x        Point at which to evaluate the polynomial.
89*412f47f9SXin Li#
90*412f47f9SXin Li# Return value is a BigFloat.
91*412f47f9SXin Lifunction poly_eval(coeffs::Array{BigFloat}, x::BigFloat)
92*412f47f9SXin Li    n = length(coeffs)
93*412f47f9SXin Li    if n == 0
94*412f47f9SXin Li        return BigFloat(0)
95*412f47f9SXin Li    elseif n == 1
96*412f47f9SXin Li        return coeffs[1]
97*412f47f9SXin Li    else
98*412f47f9SXin Li        return coeffs[1] + x * poly_eval(coeffs[2:n], x)
99*412f47f9SXin Li    end
100*412f47f9SXin Liend
101*412f47f9SXin Li
102*412f47f9SXin Li# Evaluate a rational function.
103*412f47f9SXin Li
104*412f47f9SXin Li# Arguments:
105*412f47f9SXin Li#    ncoeffs  Array of BigFloats giving the coefficients of the numerator.
106*412f47f9SXin Li#             Starts with the constant term, and 1-based, as above.
107*412f47f9SXin Li#    dcoeffs  Array of BigFloats giving the coefficients of the denominator.
108*412f47f9SXin Li#             Starts with the constant term, and 1-based, as above.
109*412f47f9SXin Li#    x        Point at which to evaluate the function.
110*412f47f9SXin Li#
111*412f47f9SXin Li# Return value is a BigFloat.
112*412f47f9SXin Lifunction ratfn_eval(ncoeffs::Array{BigFloat}, dcoeffs::Array{BigFloat},
113*412f47f9SXin Li                    x::BigFloat)
114*412f47f9SXin Li    return poly_eval(ncoeffs, x) / poly_eval(dcoeffs, x)
115*412f47f9SXin Liend
116*412f47f9SXin Li
117*412f47f9SXin Li# Format a BigFloat into an appropriate output format.
118*412f47f9SXin Li# Arguments:
119*412f47f9SXin Li#    x        BigFloat to format.
120*412f47f9SXin Li#
121*412f47f9SXin Li# Return value is a string.
122*412f47f9SXin Lifunction float_to_str(x)
123*412f47f9SXin Li    return string(x) * floatsuffix
124*412f47f9SXin Liend
125*412f47f9SXin Li
126*412f47f9SXin Li# Format a polynomial into an arithmetic expression, for pasting into
127*412f47f9SXin Li# other tools such as gnuplot.
128*412f47f9SXin Li
129*412f47f9SXin Li# Arguments:
130*412f47f9SXin Li#    coeffs   Array of BigFloats giving the coefficients of the polynomial.
131*412f47f9SXin Li#             Starts with the constant term, and 1-based, as above.
132*412f47f9SXin Li#
133*412f47f9SXin Li# Return value is a string.
134*412f47f9SXin Lifunction poly_to_string(coeffs::Array{BigFloat})
135*412f47f9SXin Li    n = length(coeffs)
136*412f47f9SXin Li    if n == 0
137*412f47f9SXin Li        return "0"
138*412f47f9SXin Li    elseif n == 1
139*412f47f9SXin Li        return float_to_str(coeffs[1])
140*412f47f9SXin Li    else
141*412f47f9SXin Li        return string(float_to_str(coeffs[1]), "+", xvarname, "*(",
142*412f47f9SXin Li                      poly_to_string(coeffs[2:n]), ")")
143*412f47f9SXin Li    end
144*412f47f9SXin Liend
145*412f47f9SXin Li
146*412f47f9SXin Li# Format a rational function into a string.
147*412f47f9SXin Li
148*412f47f9SXin Li# Arguments:
149*412f47f9SXin Li#    ncoeffs  Array of BigFloats giving the coefficients of the numerator.
150*412f47f9SXin Li#             Starts with the constant term, and 1-based, as above.
151*412f47f9SXin Li#    dcoeffs  Array of BigFloats giving the coefficients of the denominator.
152*412f47f9SXin Li#             Starts with the constant term, and 1-based, as above.
153*412f47f9SXin Li#
154*412f47f9SXin Li# Return value is a string.
155*412f47f9SXin Lifunction ratfn_to_string(ncoeffs::Array{BigFloat}, dcoeffs::Array{BigFloat})
156*412f47f9SXin Li    if length(dcoeffs) == 1 && dcoeffs[1] == 1
157*412f47f9SXin Li        # Special case: if the denominator is just 1, leave it out.
158*412f47f9SXin Li        return poly_to_string(ncoeffs)
159*412f47f9SXin Li    else
160*412f47f9SXin Li        return string("(", poly_to_string(ncoeffs), ")/(",
161*412f47f9SXin Li                      poly_to_string(dcoeffs), ")")
162*412f47f9SXin Li    end
163*412f47f9SXin Liend
164*412f47f9SXin Li
165*412f47f9SXin Li# Format a list of x,y pairs into a string.
166*412f47f9SXin Li
167*412f47f9SXin Li# Arguments:
168*412f47f9SXin Li#    xys      Array of (x,y) pairs of BigFloats.
169*412f47f9SXin Li#
170*412f47f9SXin Li# Return value is a string.
171*412f47f9SXin Lifunction format_xylist(xys::Array{Tuple{BigFloat,BigFloat}})
172*412f47f9SXin Li    return ("[\n" *
173*412f47f9SXin Li            join(["  "*string(x)*" -> "*string(y) for (x,y) in xys], "\n") *
174*412f47f9SXin Li            "\n]")
175*412f47f9SXin Liend
176*412f47f9SXin Li
177*412f47f9SXin Li# ----------------------------------------------------------------------
178*412f47f9SXin Li# Matrix-equation solver for matrices of BigFloat.
179*412f47f9SXin Li#
180*412f47f9SXin Li# I had hoped that Julia's type-genericity would allow me to solve the
181*412f47f9SXin Li# matrix equation Mx=V by just writing 'M \ V'. Unfortunately, that
182*412f47f9SXin Li# works by translating the inputs into double precision and handing
183*412f47f9SXin Li# off to an optimised library, which misses the point when I have a
184*412f47f9SXin Li# matrix and vector of BigFloat and want my result in _better_ than
185*412f47f9SXin Li# double precision. So I have to implement my own specialisation of
186*412f47f9SXin Li# the \ operator for that case.
187*412f47f9SXin Li#
188*412f47f9SXin Li# Fortunately, the point of using BigFloats is that we have precision
189*412f47f9SXin Li# to burn, so I can do completely naïve Gaussian elimination without
190*412f47f9SXin Li# worrying about instability.
191*412f47f9SXin Li
192*412f47f9SXin Li# Arguments:
193*412f47f9SXin Li#    matrix_in    2-dimensional array of BigFloats, representing a matrix M
194*412f47f9SXin Li#                 in row-first order, i.e. matrix_in[r,c] represents the
195*412f47f9SXin Li#                 entry in row r col c.
196*412f47f9SXin Li#    vector_in    1-dimensional array of BigFloats, representing a vector V.
197*412f47f9SXin Li#
198*412f47f9SXin Li# Return value: a 1-dimensional array X of BigFloats, satisfying M X = V.
199*412f47f9SXin Li#
200*412f47f9SXin Li# Expects the input to be an invertible square matrix and a vector of
201*412f47f9SXin Li# the corresponding size, on pain of failing an assertion.
202*412f47f9SXin Lifunction \(matrix_in :: Array{BigFloat,2},
203*412f47f9SXin Li           vector_in :: Array{BigFloat,1})
204*412f47f9SXin Li    # Copy the inputs, because we'll be mutating them as we go.
205*412f47f9SXin Li    M = copy(matrix_in)
206*412f47f9SXin Li    V = copy(vector_in)
207*412f47f9SXin Li
208*412f47f9SXin Li    # Input consistency criteria: matrix is square, and vector has
209*412f47f9SXin Li    # length to match.
210*412f47f9SXin Li    n = length(V)
211*412f47f9SXin Li    @assert(n > 0)
212*412f47f9SXin Li    @assert(size(M) == (n,n))
213*412f47f9SXin Li
214*412f47f9SXin Li    @debug("gausselim", "starting, n=", n)
215*412f47f9SXin Li
216*412f47f9SXin Li    for i = 1:1:n
217*412f47f9SXin Li        # Straightforward Gaussian elimination: find the largest
218*412f47f9SXin Li        # non-zero entry in column i (and in a row we haven't sorted
219*412f47f9SXin Li        # out already), swap it into row i, scale that row to
220*412f47f9SXin Li        # normalise it to 1, then zero out the rest of the column by
221*412f47f9SXin Li        # subtracting a multiple of that row from each other row.
222*412f47f9SXin Li
223*412f47f9SXin Li        @debug("gausselim", "matrix=", repr(M))
224*412f47f9SXin Li        @debug("gausselim", "vector=", repr(V))
225*412f47f9SXin Li
226*412f47f9SXin Li        # Find the best pivot.
227*412f47f9SXin Li        bestrow = 0
228*412f47f9SXin Li        bestval = 0
229*412f47f9SXin Li        for j = i:1:n
230*412f47f9SXin Li            if abs(M[j,i]) > bestval
231*412f47f9SXin Li                bestrow = j
232*412f47f9SXin Li                bestval = M[j,i]
233*412f47f9SXin Li            end
234*412f47f9SXin Li        end
235*412f47f9SXin Li        @assert(bestrow > 0) # make sure we did actually find one
236*412f47f9SXin Li
237*412f47f9SXin Li        @debug("gausselim", "bestrow=", bestrow)
238*412f47f9SXin Li
239*412f47f9SXin Li        # Swap it into row i.
240*412f47f9SXin Li        if bestrow != i
241*412f47f9SXin Li            for k = 1:1:n
242*412f47f9SXin Li                M[bestrow,k],M[i,k] = M[i,k],M[bestrow,k]
243*412f47f9SXin Li            end
244*412f47f9SXin Li            V[bestrow],V[i] = V[i],V[bestrow]
245*412f47f9SXin Li        end
246*412f47f9SXin Li
247*412f47f9SXin Li        # Scale that row so that M[i,i] becomes 1.
248*412f47f9SXin Li        divisor = M[i,i]
249*412f47f9SXin Li        for k = 1:1:n
250*412f47f9SXin Li            M[i,k] = M[i,k] / divisor
251*412f47f9SXin Li        end
252*412f47f9SXin Li        V[i] = V[i] / divisor
253*412f47f9SXin Li        @assert(M[i,i] == 1)
254*412f47f9SXin Li
255*412f47f9SXin Li        # Zero out all other entries in column i, by subtracting
256*412f47f9SXin Li        # multiples of this row.
257*412f47f9SXin Li        for j = 1:1:n
258*412f47f9SXin Li            if j != i
259*412f47f9SXin Li                factor = M[j,i]
260*412f47f9SXin Li                for k = 1:1:n
261*412f47f9SXin Li                    M[j,k] = M[j,k] - M[i,k] * factor
262*412f47f9SXin Li                end
263*412f47f9SXin Li                V[j] = V[j] - V[i] * factor
264*412f47f9SXin Li                @assert(M[j,i] == 0)
265*412f47f9SXin Li            end
266*412f47f9SXin Li        end
267*412f47f9SXin Li    end
268*412f47f9SXin Li
269*412f47f9SXin Li    @debug("gausselim", "matrix=", repr(M))
270*412f47f9SXin Li    @debug("gausselim", "vector=", repr(V))
271*412f47f9SXin Li    @debug("gausselim", "done!")
272*412f47f9SXin Li
273*412f47f9SXin Li    # Now we're done: M is the identity matrix, so the equation Mx=V
274*412f47f9SXin Li    # becomes just x=V, i.e. V is already exactly the vector we want
275*412f47f9SXin Li    # to return.
276*412f47f9SXin Li    return V
277*412f47f9SXin Liend
278*412f47f9SXin Li
279*412f47f9SXin Li# ----------------------------------------------------------------------
280*412f47f9SXin Li# Least-squares fitting of a rational function to a set of (x,y)
281*412f47f9SXin Li# points.
282*412f47f9SXin Li#
283*412f47f9SXin Li# We use this to get an initial starting point for the Remez
284*412f47f9SXin Li# iteration. Therefore, it doesn't really need to be particularly
285*412f47f9SXin Li# accurate; it only needs to be good enough to wiggle back and forth
286*412f47f9SXin Li# across the target function the right number of times (so as to give
287*412f47f9SXin Li# enough error extrema to start optimising from) and not have any
288*412f47f9SXin Li# poles in the target interval.
289*412f47f9SXin Li#
290*412f47f9SXin Li# Least-squares fitting of a _polynomial_ is actually a sensible thing
291*412f47f9SXin Li# to do, and minimises the rms error. Doing the following trick with a
292*412f47f9SXin Li# rational function P/Q is less sensible, because it cannot be made to
293*412f47f9SXin Li# minimise the error function (P/Q-f)^2 that you actually wanted;
294*412f47f9SXin Li# instead it minimises (P-fQ)^2. But that should be good enough to
295*412f47f9SXin Li# have the properties described above.
296*412f47f9SXin Li#
297*412f47f9SXin Li# Some theory: suppose you're trying to choose a set of parameters a_i
298*412f47f9SXin Li# so as to minimise the sum of squares of some error function E_i.
299*412f47f9SXin Li# Basic calculus says, if you do this in one variable, just
300*412f47f9SXin Li# differentiate and solve for zero. In this case, that works fine even
301*412f47f9SXin Li# with multiple variables, because you _partially_ differentiate with
302*412f47f9SXin Li# respect to each a_i, giving a system of equations, and that system
303*412f47f9SXin Li# turns out to be linear so we just solve it as a matrix.
304*412f47f9SXin Li#
305*412f47f9SXin Li# In this case, our parameters are the coefficients of P and Q; to
306*412f47f9SXin Li# avoid underdetermining the system we'll fix Q's constant term at 1,
307*412f47f9SXin Li# so that our error function (as described above) is
308*412f47f9SXin Li#
309*412f47f9SXin Li# E = \sum (p_0 + p_1 x + ... + p_n x^n - y - y q_1 x - ... - y q_d x^d)^2
310*412f47f9SXin Li#
311*412f47f9SXin Li# where the sum is over all (x,y) coordinate pairs. Setting dE/dp_j=0
312*412f47f9SXin Li# (for each j) gives an equation of the form
313*412f47f9SXin Li#
314*412f47f9SXin Li# 0 = \sum 2(p_0 + p_1 x + ... + p_n x^n - y - y q_1 x - ... - y q_d x^d) x^j
315*412f47f9SXin Li#
316*412f47f9SXin Li# and setting dE/dq_j=0 gives one of the form
317*412f47f9SXin Li#
318*412f47f9SXin Li# 0 = \sum 2(p_0 + p_1 x + ... + p_n x^n - y - y q_1 x - ... - y q_d x^d) y x^j
319*412f47f9SXin Li#
320*412f47f9SXin Li# And both of those row types, treated as multivariate linear
321*412f47f9SXin Li# equations in the p,q values, have each coefficient being a value of
322*412f47f9SXin Li# the form \sum x^i, \sum y x^i or \sum y^2 x^i, for various i. (Times
323*412f47f9SXin Li# a factor of 2, but we can throw that away.) So we can go through the
324*412f47f9SXin Li# list of input coordinates summing all of those things, and then we
325*412f47f9SXin Li# have enough information to construct our matrix and solve it
326*412f47f9SXin Li# straight off for the rational function coefficients.
327*412f47f9SXin Li
328*412f47f9SXin Li# Arguments:
329*412f47f9SXin Li#    f        The function to be approximated. Maps BigFloat -> BigFloat.
330*412f47f9SXin Li#    xvals    Array of BigFloats, giving the list of x-coordinates at which
331*412f47f9SXin Li#             to evaluate f.
332*412f47f9SXin Li#    n        Degree of the numerator polynomial of the desired rational
333*412f47f9SXin Li#             function.
334*412f47f9SXin Li#    d        Degree of the denominator polynomial of the desired rational
335*412f47f9SXin Li#             function.
336*412f47f9SXin Li#    w        Error-weighting function. Takes two BigFloat arguments x,y
337*412f47f9SXin Li#             and returns a scaling factor for the error at that location.
338*412f47f9SXin Li#             A larger value indicates that the error should be given
339*412f47f9SXin Li#             greater weight in the square sum we try to minimise.
340*412f47f9SXin Li#             If unspecified, defaults to giving everything the same weight.
341*412f47f9SXin Li#
342*412f47f9SXin Li# Return values: a pair of arrays of BigFloats (N,D) giving the
343*412f47f9SXin Li# coefficients of the returned rational function. N has size n+1; D
344*412f47f9SXin Li# has size d+1. Both start with the constant term, i.e. N[i] is the
345*412f47f9SXin Li# coefficient of x^(i-1) (because Julia arrays are 1-based). D[1] will
346*412f47f9SXin Li# be 1.
347*412f47f9SXin Lifunction ratfn_leastsquares(f::Function, xvals::Array{BigFloat}, n, d,
348*412f47f9SXin Li                            w = (x,y)->BigFloat(1))
349*412f47f9SXin Li    # Accumulate sums of x^i y^j, for j={0,1,2} and a range of x.
350*412f47f9SXin Li    # Again because Julia arrays are 1-based, we'll have sums[i,j]
351*412f47f9SXin Li    # being the sum of x^(i-1) y^(j-1).
352*412f47f9SXin Li    maxpow = max(n,d) * 2 + 1
353*412f47f9SXin Li    sums = zeros(BigFloat, maxpow, 3)
354*412f47f9SXin Li    for x = xvals
355*412f47f9SXin Li        y = f(x)
356*412f47f9SXin Li        weight = w(x,y)
357*412f47f9SXin Li        for i = 1:1:maxpow
358*412f47f9SXin Li            for j = 1:1:3
359*412f47f9SXin Li                sums[i,j] += x^(i-1) * y^(j-1) * weight
360*412f47f9SXin Li            end
361*412f47f9SXin Li        end
362*412f47f9SXin Li    end
363*412f47f9SXin Li
364*412f47f9SXin Li    @debug("leastsquares", "sums=", repr(sums))
365*412f47f9SXin Li
366*412f47f9SXin Li    # Build the matrix. We're solving n+d+1 equations in n+d+1
367*412f47f9SXin Li    # unknowns. (We actually have to return n+d+2 coefficients, but
368*412f47f9SXin Li    # one of them is hardwired to 1.)
369*412f47f9SXin Li    matrix = array2d(BigFloat, n+d+1, n+d+1)
370*412f47f9SXin Li    vector = array1d(BigFloat, n+d+1)
371*412f47f9SXin Li    for i = 0:1:n
372*412f47f9SXin Li        # Equation obtained by differentiating with respect to p_i,
373*412f47f9SXin Li        # i.e. the numerator coefficient of x^i.
374*412f47f9SXin Li        row = 1+i
375*412f47f9SXin Li        for j = 0:1:n
376*412f47f9SXin Li            matrix[row, 1+j] = sums[1+i+j, 1]
377*412f47f9SXin Li        end
378*412f47f9SXin Li        for j = 1:1:d
379*412f47f9SXin Li            matrix[row, 1+n+j] = -sums[1+i+j, 2]
380*412f47f9SXin Li        end
381*412f47f9SXin Li        vector[row] = sums[1+i, 2]
382*412f47f9SXin Li    end
383*412f47f9SXin Li    for i = 1:1:d
384*412f47f9SXin Li        # Equation obtained by differentiating with respect to q_i,
385*412f47f9SXin Li        # i.e. the denominator coefficient of x^i.
386*412f47f9SXin Li        row = 1+n+i
387*412f47f9SXin Li        for j = 0:1:n
388*412f47f9SXin Li            matrix[row, 1+j] = sums[1+i+j, 2]
389*412f47f9SXin Li        end
390*412f47f9SXin Li        for j = 1:1:d
391*412f47f9SXin Li            matrix[row, 1+n+j] = -sums[1+i+j, 3]
392*412f47f9SXin Li        end
393*412f47f9SXin Li        vector[row] = sums[1+i, 3]
394*412f47f9SXin Li    end
395*412f47f9SXin Li
396*412f47f9SXin Li    @debug("leastsquares", "matrix=", repr(matrix))
397*412f47f9SXin Li    @debug("leastsquares", "vector=", repr(vector))
398*412f47f9SXin Li
399*412f47f9SXin Li    # Solve the matrix equation.
400*412f47f9SXin Li    all_coeffs = matrix \ vector
401*412f47f9SXin Li
402*412f47f9SXin Li    @debug("leastsquares", "all_coeffs=", repr(all_coeffs))
403*412f47f9SXin Li
404*412f47f9SXin Li    # And marshal the results into two separate polynomial vectors to
405*412f47f9SXin Li    # return.
406*412f47f9SXin Li    ncoeffs = all_coeffs[1:n+1]
407*412f47f9SXin Li    dcoeffs = vcat([1], all_coeffs[n+2:n+d+1])
408*412f47f9SXin Li    return (ncoeffs, dcoeffs)
409*412f47f9SXin Liend
410*412f47f9SXin Li
411*412f47f9SXin Li# ----------------------------------------------------------------------
412*412f47f9SXin Li# Golden-section search to find a maximum of a function.
413*412f47f9SXin Li
414*412f47f9SXin Li# Arguments:
415*412f47f9SXin Li#    f        Function to be maximised/minimised. Maps BigFloat -> BigFloat.
416*412f47f9SXin Li#    a,b,c    BigFloats bracketing a maximum of the function.
417*412f47f9SXin Li#
418*412f47f9SXin Li# Expects:
419*412f47f9SXin Li#    a,b,c are in order (either a<=b<=c or c<=b<=a)
420*412f47f9SXin Li#    a != c             (but b can equal one or the other if it wants to)
421*412f47f9SXin Li#    f(a) <= f(b) >= f(c)
422*412f47f9SXin Li#
423*412f47f9SXin Li# Return value is an (x,y) pair of BigFloats giving the extremal input
424*412f47f9SXin Li# and output. (That is, y=f(x).)
425*412f47f9SXin Lifunction goldensection(f::Function, a::BigFloat, b::BigFloat, c::BigFloat)
426*412f47f9SXin Li    # Decide on a 'good enough' threshold.
427*412f47f9SXin Li    threshold = abs(c-a) * 2^(-epsbits/2)
428*412f47f9SXin Li
429*412f47f9SXin Li    # We'll need the golden ratio phi, of course. Or rather, in this
430*412f47f9SXin Li    # case, we need 1/phi = 0.618...
431*412f47f9SXin Li    one_over_phi = 2 / (1 + sqrt(BigFloat(5)))
432*412f47f9SXin Li
433*412f47f9SXin Li    # Flip round the interval endpoints so that the interval [a,b] is
434*412f47f9SXin Li    # at least as large as [b,c]. (Then we can always pick our new
435*412f47f9SXin Li    # point in [a,b] without having to handle lots of special cases.)
436*412f47f9SXin Li    if abs(b-a) < abs(c-a)
437*412f47f9SXin Li        a,  c  = c,  a
438*412f47f9SXin Li    end
439*412f47f9SXin Li
440*412f47f9SXin Li    # Evaluate the function at the initial points.
441*412f47f9SXin Li    fa = f(a)
442*412f47f9SXin Li    fb = f(b)
443*412f47f9SXin Li    fc = f(c)
444*412f47f9SXin Li
445*412f47f9SXin Li    @debug("goldensection", "starting")
446*412f47f9SXin Li
447*412f47f9SXin Li    while abs(c-a) > threshold
448*412f47f9SXin Li        @debug("goldensection", "a: ", a, " -> ", fa)
449*412f47f9SXin Li        @debug("goldensection", "b: ", b, " -> ", fb)
450*412f47f9SXin Li        @debug("goldensection", "c: ", c, " -> ", fc)
451*412f47f9SXin Li
452*412f47f9SXin Li        # Check invariants.
453*412f47f9SXin Li        @assert(a <= b <= c || c <= b <= a)
454*412f47f9SXin Li        @assert(fa <= fb >= fc)
455*412f47f9SXin Li
456*412f47f9SXin Li        # Subdivide the larger of the intervals [a,b] and [b,c]. We've
457*412f47f9SXin Li        # arranged that this is always [a,b], for simplicity.
458*412f47f9SXin Li        d = a + (b-a) * one_over_phi
459*412f47f9SXin Li
460*412f47f9SXin Li        # Now we have an interval looking like this (possibly
461*412f47f9SXin Li        # reversed):
462*412f47f9SXin Li        #
463*412f47f9SXin Li        #    a            d       b            c
464*412f47f9SXin Li        #
465*412f47f9SXin Li        # and we know f(b) is bigger than either f(a) or f(c). We have
466*412f47f9SXin Li        # two cases: either f(d) > f(b), or vice versa. In either
467*412f47f9SXin Li        # case, we can narrow to an interval of 1/phi the size, and
468*412f47f9SXin Li        # still satisfy all our invariants (three ordered points,
469*412f47f9SXin Li        # [a,b] at least the width of [b,c], f(a)<=f(b)>=f(c)).
470*412f47f9SXin Li        fd = f(d)
471*412f47f9SXin Li        @debug("goldensection", "d: ", d, " -> ", fd)
472*412f47f9SXin Li        if fd > fb
473*412f47f9SXin Li            a,  b,  c  = a,  d,  b
474*412f47f9SXin Li            fa, fb, fc = fa, fd, fb
475*412f47f9SXin Li            @debug("goldensection", "adb case")
476*412f47f9SXin Li        else
477*412f47f9SXin Li            a,  b,  c  = c,  b,  d
478*412f47f9SXin Li            fa, fb, fc = fc, fb, fd
479*412f47f9SXin Li            @debug("goldensection", "cbd case")
480*412f47f9SXin Li        end
481*412f47f9SXin Li    end
482*412f47f9SXin Li
483*412f47f9SXin Li    @debug("goldensection", "done: ", b, " -> ", fb)
484*412f47f9SXin Li    return (b, fb)
485*412f47f9SXin Liend
486*412f47f9SXin Li
487*412f47f9SXin Li# ----------------------------------------------------------------------
488*412f47f9SXin Li# Find the extrema of a function within a given interval.
489*412f47f9SXin Li
490*412f47f9SXin Li# Arguments:
491*412f47f9SXin Li#    f         The function to be approximated. Maps BigFloat -> BigFloat.
492*412f47f9SXin Li#    grid      A set of points at which to evaluate f. Must be high enough
493*412f47f9SXin Li#              resolution to make extrema obvious.
494*412f47f9SXin Li#
495*412f47f9SXin Li# Returns an array of (x,y) pairs of BigFloats, with each x,y giving
496*412f47f9SXin Li# the extremum location and its value (i.e. y=f(x)).
497*412f47f9SXin Lifunction find_extrema(f::Function, grid::Array{BigFloat})
498*412f47f9SXin Li    len = length(grid)
499*412f47f9SXin Li    extrema = array1d(Tuple{BigFloat, BigFloat}, 0)
500*412f47f9SXin Li    for i = 1:1:len
501*412f47f9SXin Li        # We have to provide goldensection() with three points
502*412f47f9SXin Li        # bracketing the extremum. If the extremum is at one end of
503*412f47f9SXin Li        # the interval, then the only way we can do that is to set two
504*412f47f9SXin Li        # of the points equal (which goldensection() will cope with).
505*412f47f9SXin Li        prev = max(1, i-1)
506*412f47f9SXin Li        next = min(i+1, len)
507*412f47f9SXin Li
508*412f47f9SXin Li        # Find our three pairs of (x,y) coordinates.
509*412f47f9SXin Li        xp, xi, xn = grid[prev], grid[i], grid[next]
510*412f47f9SXin Li        yp, yi, yn = f(xp), f(xi), f(xn)
511*412f47f9SXin Li
512*412f47f9SXin Li        # See if they look like an extremum, and if so, ask
513*412f47f9SXin Li        # goldensection() to give a more exact location for it.
514*412f47f9SXin Li        if yp <= yi >= yn
515*412f47f9SXin Li            push!(extrema, goldensection(f, xp, xi, xn))
516*412f47f9SXin Li        elseif yp >= yi <= yn
517*412f47f9SXin Li            x, y = goldensection(x->-f(x), xp, xi, xn)
518*412f47f9SXin Li            push!(extrema, (x, -y))
519*412f47f9SXin Li        end
520*412f47f9SXin Li    end
521*412f47f9SXin Li    return extrema
522*412f47f9SXin Liend
523*412f47f9SXin Li
524*412f47f9SXin Li# ----------------------------------------------------------------------
525*412f47f9SXin Li# Winnow a list of a function's extrema to give a subsequence of a
526*412f47f9SXin Li# specified length, with the extrema in the subsequence alternating
527*412f47f9SXin Li# signs, and with the smallest absolute value of an extremum in the
528*412f47f9SXin Li# subsequence as large as possible.
529*412f47f9SXin Li#
530*412f47f9SXin Li# We do this using a dynamic-programming approach. We work along the
531*412f47f9SXin Li# provided array of extrema, and at all times, we track the best set
532*412f47f9SXin Li# of extrema we have so far seen for each possible (length, sign of
533*412f47f9SXin Li# last extremum) pair. Each new extremum is evaluated to see whether
534*412f47f9SXin Li# it can be added to any previously seen best subsequence to make a
535*412f47f9SXin Li# new subsequence that beats the previous record holder in its slot.
536*412f47f9SXin Li
537*412f47f9SXin Li# Arguments:
538*412f47f9SXin Li#    extrema   An array of (x,y) pairs of BigFloats giving the input extrema.
539*412f47f9SXin Li#    n         Number of extrema required as output.
540*412f47f9SXin Li#
541*412f47f9SXin Li# Returns a new array of (x,y) pairs which is a subsequence of the
542*412f47f9SXin Li# original sequence. (So, in particular, if the input was sorted by x
543*412f47f9SXin Li# then so will the output be.)
544*412f47f9SXin Lifunction winnow_extrema(extrema::Array{Tuple{BigFloat,BigFloat}}, n)
545*412f47f9SXin Li    # best[i,j] gives the best sequence so far of length i and with
546*412f47f9SXin Li    # sign j (where signs are coded as 1=positive, 2=negative), in the
547*412f47f9SXin Li    # form of a tuple (cost, actual array of x,y pairs).
548*412f47f9SXin Li    best = fill((BigFloat(0), array1d(Tuple{BigFloat,BigFloat}, 0)), n, 2)
549*412f47f9SXin Li
550*412f47f9SXin Li    for (x,y) = extrema
551*412f47f9SXin Li        if y > 0
552*412f47f9SXin Li            sign = 1
553*412f47f9SXin Li        elseif y < 0
554*412f47f9SXin Li            sign = 2
555*412f47f9SXin Li        else
556*412f47f9SXin Li            # A zero-valued extremum cannot possibly contribute to any
557*412f47f9SXin Li            # optimal sequence, so we simply ignore it!
558*412f47f9SXin Li            continue
559*412f47f9SXin Li        end
560*412f47f9SXin Li
561*412f47f9SXin Li        for i = 1:1:n
562*412f47f9SXin Li            # See if we can create a new entry for best[i,sign] by
563*412f47f9SXin Li            # appending our current (x,y) to some previous thing.
564*412f47f9SXin Li            if i == 1
565*412f47f9SXin Li                # Special case: we don't store a best zero-length
566*412f47f9SXin Li                # sequence :-)
567*412f47f9SXin Li                candidate = (abs(y), [(x,y)])
568*412f47f9SXin Li            else
569*412f47f9SXin Li                othersign = 3-sign # map 1->2 and 2->1
570*412f47f9SXin Li                oldscore, oldlist = best[i-1, othersign]
571*412f47f9SXin Li                newscore = min(abs(y), oldscore)
572*412f47f9SXin Li                newlist = vcat(oldlist, [(x,y)])
573*412f47f9SXin Li                candidate = (newscore, newlist)
574*412f47f9SXin Li            end
575*412f47f9SXin Li            # If our new candidate improves on the previous value of
576*412f47f9SXin Li            # best[i,sign], then replace it.
577*412f47f9SXin Li            if candidate[1] > best[i,sign][1]
578*412f47f9SXin Li                best[i,sign] = candidate
579*412f47f9SXin Li            end
580*412f47f9SXin Li        end
581*412f47f9SXin Li    end
582*412f47f9SXin Li
583*412f47f9SXin Li    # Our ultimate return value has to be either best[n,1] or
584*412f47f9SXin Li    # best[n,2], but it could be either. See which one has the higher
585*412f47f9SXin Li    # score.
586*412f47f9SXin Li    if best[n,1][1] > best[n,2][1]
587*412f47f9SXin Li        ret = best[n,1][2]
588*412f47f9SXin Li    else
589*412f47f9SXin Li        ret = best[n,2][2]
590*412f47f9SXin Li    end
591*412f47f9SXin Li    # Make sure we did actually _find_ a good answer.
592*412f47f9SXin Li    @assert(length(ret) == n)
593*412f47f9SXin Li    return ret
594*412f47f9SXin Liend
595*412f47f9SXin Li
596*412f47f9SXin Li# ----------------------------------------------------------------------
597*412f47f9SXin Li# Construct a rational-function approximation with equal and
598*412f47f9SXin Li# alternating weighted deviation at a specific set of x-coordinates.
599*412f47f9SXin Li
600*412f47f9SXin Li# Arguments:
601*412f47f9SXin Li#    f         The function to be approximated. Maps BigFloat -> BigFloat.
602*412f47f9SXin Li#    coords    An array of BigFloats giving the x-coordinates. There should
603*412f47f9SXin Li#              be n+d+2 of them.
604*412f47f9SXin Li#    n, d      The degrees of the numerator and denominator of the desired
605*412f47f9SXin Li#              approximation.
606*412f47f9SXin Li#    prev_err  A plausible value for the alternating weighted deviation.
607*412f47f9SXin Li#              (Required to kickstart a binary search in the nonlinear case;
608*412f47f9SXin Li#              see comments below.)
609*412f47f9SXin Li#    w         Error-weighting function. Takes two BigFloat arguments x,y
610*412f47f9SXin Li#              and returns a scaling factor for the error at that location.
611*412f47f9SXin Li#              The returned approximation R should have the minimum possible
612*412f47f9SXin Li#              maximum value of abs((f(x)-R(x)) * w(x,f(x))). Optional
613*412f47f9SXin Li#              parameter, defaulting to the always-return-1 function.
614*412f47f9SXin Li#
615*412f47f9SXin Li# Return values: a pair of arrays of BigFloats (N,D) giving the
616*412f47f9SXin Li# coefficients of the returned rational function. N has size n+1; D
617*412f47f9SXin Li# has size d+1. Both start with the constant term, i.e. N[i] is the
618*412f47f9SXin Li# coefficient of x^(i-1) (because Julia arrays are 1-based). D[1] will
619*412f47f9SXin Li# be 1.
620*412f47f9SXin Lifunction ratfn_equal_deviation(f::Function, coords::Array{BigFloat},
621*412f47f9SXin Li                               n, d, prev_err::BigFloat,
622*412f47f9SXin Li                               w = (x,y)->BigFloat(1))
623*412f47f9SXin Li    @debug("equaldev", "n=", n, " d=", d, " coords=", repr(coords))
624*412f47f9SXin Li    @assert(length(coords) == n+d+2)
625*412f47f9SXin Li
626*412f47f9SXin Li    if d == 0
627*412f47f9SXin Li        # Special case: we're after a polynomial. In this case, we
628*412f47f9SXin Li        # have the particularly easy job of just constructing and
629*412f47f9SXin Li        # solving a system of n+2 linear equations, to find the n+1
630*412f47f9SXin Li        # coefficients of the polynomial and also the amount of
631*412f47f9SXin Li        # deviation at the specified coordinates. Each equation is of
632*412f47f9SXin Li        # the form
633*412f47f9SXin Li        #
634*412f47f9SXin Li        #   p_0 x^0 + p_1 x^1 + ... + p_n x^n ± e/w(x) = f(x)
635*412f47f9SXin Li        #
636*412f47f9SXin Li        # in which the p_i and e are the variables, and the powers of
637*412f47f9SXin Li        # x and calls to w and f are the coefficients.
638*412f47f9SXin Li
639*412f47f9SXin Li        matrix = array2d(BigFloat, n+2, n+2)
640*412f47f9SXin Li        vector = array1d(BigFloat, n+2)
641*412f47f9SXin Li        currsign = +1
642*412f47f9SXin Li        for i = 1:1:n+2
643*412f47f9SXin Li            x = coords[i]
644*412f47f9SXin Li            for j = 0:1:n
645*412f47f9SXin Li                matrix[i,1+j] = x^j
646*412f47f9SXin Li            end
647*412f47f9SXin Li            y = f(x)
648*412f47f9SXin Li            vector[i] = y
649*412f47f9SXin Li            matrix[i, n+2] = currsign / w(x,y)
650*412f47f9SXin Li            currsign = -currsign
651*412f47f9SXin Li        end
652*412f47f9SXin Li
653*412f47f9SXin Li        @debug("equaldev", "matrix=", repr(matrix))
654*412f47f9SXin Li        @debug("equaldev", "vector=", repr(vector))
655*412f47f9SXin Li
656*412f47f9SXin Li        outvector = matrix \ vector
657*412f47f9SXin Li
658*412f47f9SXin Li        @debug("equaldev", "outvector=", repr(outvector))
659*412f47f9SXin Li
660*412f47f9SXin Li        ncoeffs = outvector[1:n+1]
661*412f47f9SXin Li        dcoeffs = [BigFloat(1)]
662*412f47f9SXin Li        return ncoeffs, dcoeffs
663*412f47f9SXin Li    else
664*412f47f9SXin Li        # For a nontrivial rational function, the system of equations
665*412f47f9SXin Li        # we need to solve becomes nonlinear, because each equation
666*412f47f9SXin Li        # now takes the form
667*412f47f9SXin Li        #
668*412f47f9SXin Li        #   p_0 x^0 + p_1 x^1 + ... + p_n x^n
669*412f47f9SXin Li        #   --------------------------------- ± e/w(x) = f(x)
670*412f47f9SXin Li        #     x^0 + q_1 x^1 + ... + q_d x^d
671*412f47f9SXin Li        #
672*412f47f9SXin Li        # and multiplying up by the denominator gives you a lot of
673*412f47f9SXin Li        # terms containing e × q_i. So we can't do this the really
674*412f47f9SXin Li        # easy way using a matrix equation as above.
675*412f47f9SXin Li        #
676*412f47f9SXin Li        # Fortunately, this is a fairly easy kind of nonlinear system.
677*412f47f9SXin Li        # The equations all become linear if you switch to treating e
678*412f47f9SXin Li        # as a constant, so a reasonably sensible approach is to pick
679*412f47f9SXin Li        # a candidate value of e, solve all but one of the equations
680*412f47f9SXin Li        # for the remaining unknowns, and then see what the error
681*412f47f9SXin Li        # turns out to be in the final equation. The Chebyshev
682*412f47f9SXin Li        # alternation theorem guarantees that that error in the last
683*412f47f9SXin Li        # equation will be anti-monotonic in the input e, so we can
684*412f47f9SXin Li        # just binary-search until we get the two as close to equal as
685*412f47f9SXin Li        # we need them.
686*412f47f9SXin Li
687*412f47f9SXin Li        function try_e(e)
688*412f47f9SXin Li            # Try a given value of e, derive the coefficients of the
689*412f47f9SXin Li            # resulting rational function by setting up equations
690*412f47f9SXin Li            # based on the first n+d+1 of the n+d+2 coordinates, and
691*412f47f9SXin Li            # see what the error turns out to be at the final
692*412f47f9SXin Li            # coordinate.
693*412f47f9SXin Li            matrix = array2d(BigFloat, n+d+1, n+d+1)
694*412f47f9SXin Li            vector = array1d(BigFloat, n+d+1)
695*412f47f9SXin Li            currsign = +1
696*412f47f9SXin Li            for i = 1:1:n+d+1
697*412f47f9SXin Li                x = coords[i]
698*412f47f9SXin Li                y = f(x)
699*412f47f9SXin Li                y_adj = y - currsign * e / w(x,y)
700*412f47f9SXin Li                for j = 0:1:n
701*412f47f9SXin Li                    matrix[i,1+j] = x^j
702*412f47f9SXin Li                end
703*412f47f9SXin Li                for j = 1:1:d
704*412f47f9SXin Li                    matrix[i,1+n+j] = -x^j * y_adj
705*412f47f9SXin Li                end
706*412f47f9SXin Li                vector[i] = y_adj
707*412f47f9SXin Li                currsign = -currsign
708*412f47f9SXin Li            end
709*412f47f9SXin Li
710*412f47f9SXin Li            @debug("equaldev", "trying e=", e)
711*412f47f9SXin Li            @debug("equaldev", "matrix=", repr(matrix))
712*412f47f9SXin Li            @debug("equaldev", "vector=", repr(vector))
713*412f47f9SXin Li
714*412f47f9SXin Li            outvector = matrix \ vector
715*412f47f9SXin Li
716*412f47f9SXin Li            @debug("equaldev", "outvector=", repr(outvector))
717*412f47f9SXin Li
718*412f47f9SXin Li            ncoeffs = outvector[1:n+1]
719*412f47f9SXin Li            dcoeffs = vcat([BigFloat(1)], outvector[n+2:n+d+1])
720*412f47f9SXin Li
721*412f47f9SXin Li            x = coords[n+d+2]
722*412f47f9SXin Li            y = f(x)
723*412f47f9SXin Li            last_e = (ratfn_eval(ncoeffs, dcoeffs, x) - y) * w(x,y) * -currsign
724*412f47f9SXin Li
725*412f47f9SXin Li            @debug("equaldev", "last e=", last_e)
726*412f47f9SXin Li
727*412f47f9SXin Li            return ncoeffs, dcoeffs, last_e
728*412f47f9SXin Li        end
729*412f47f9SXin Li
730*412f47f9SXin Li        threshold = 2^(-epsbits/2) # convergence threshold
731*412f47f9SXin Li
732*412f47f9SXin Li        # Start by trying our previous iteration's error value. This
733*412f47f9SXin Li        # value (e0) will be one end of our binary-search interval,
734*412f47f9SXin Li        # and whatever it caused the last point's error to be, that
735*412f47f9SXin Li        # (e1) will be the other end.
736*412f47f9SXin Li        e0 = prev_err
737*412f47f9SXin Li        @debug("equaldev", "e0 = ", e0)
738*412f47f9SXin Li        nc, dc, e1 = try_e(e0)
739*412f47f9SXin Li        @debug("equaldev", "e1 = ", e1)
740*412f47f9SXin Li        if abs(e1-e0) <= threshold
741*412f47f9SXin Li            # If we're _really_ lucky, we hit the error right on the
742*412f47f9SXin Li            # nose just by doing that!
743*412f47f9SXin Li            return nc, dc
744*412f47f9SXin Li        end
745*412f47f9SXin Li        s = sign(e1-e0)
746*412f47f9SXin Li        @debug("equaldev", "s = ", s)
747*412f47f9SXin Li
748*412f47f9SXin Li        # Verify by assertion that trying our other interval endpoint
749*412f47f9SXin Li        # e1 gives a value that's wrong in the other direction.
750*412f47f9SXin Li        # (Otherwise our binary search won't get a sensible answer at
751*412f47f9SXin Li        # all.)
752*412f47f9SXin Li        nc, dc, e2 = try_e(e1)
753*412f47f9SXin Li        @debug("equaldev", "e2 = ", e2)
754*412f47f9SXin Li        @assert(sign(e2-e1) == -s)
755*412f47f9SXin Li
756*412f47f9SXin Li        # Now binary-search until our two endpoints narrow enough.
757*412f47f9SXin Li        local emid
758*412f47f9SXin Li        while abs(e1-e0) > threshold
759*412f47f9SXin Li            emid = (e1+e0)/2
760*412f47f9SXin Li            nc, dc, enew = try_e(emid)
761*412f47f9SXin Li            if sign(enew-emid) == s
762*412f47f9SXin Li                e0 = emid
763*412f47f9SXin Li            else
764*412f47f9SXin Li                e1 = emid
765*412f47f9SXin Li            end
766*412f47f9SXin Li        end
767*412f47f9SXin Li
768*412f47f9SXin Li        @debug("equaldev", "final e=", emid)
769*412f47f9SXin Li        return nc, dc
770*412f47f9SXin Li    end
771*412f47f9SXin Liend
772*412f47f9SXin Li
773*412f47f9SXin Li# ----------------------------------------------------------------------
774*412f47f9SXin Li# Top-level function to find a minimax rational-function approximation.
775*412f47f9SXin Li
776*412f47f9SXin Li# Arguments:
777*412f47f9SXin Li#    f         The function to be approximated. Maps BigFloat -> BigFloat.
778*412f47f9SXin Li#    interval  A pair of BigFloats giving the endpoints of the interval
779*412f47f9SXin Li#              (in either order) on which to approximate f.
780*412f47f9SXin Li#    n, d      The degrees of the numerator and denominator of the desired
781*412f47f9SXin Li#              approximation.
782*412f47f9SXin Li#    w         Error-weighting function. Takes two BigFloat arguments x,y
783*412f47f9SXin Li#              and returns a scaling factor for the error at that location.
784*412f47f9SXin Li#              The returned approximation R should have the minimum possible
785*412f47f9SXin Li#              maximum value of abs((f(x)-R(x)) * w(x,f(x))). Optional
786*412f47f9SXin Li#              parameter, defaulting to the always-return-1 function.
787*412f47f9SXin Li#
788*412f47f9SXin Li# Return values: a tuple (N,D,E,X), where
789*412f47f9SXin Li
790*412f47f9SXin Li#    N,D       A pair of arrays of BigFloats giving the coefficients
791*412f47f9SXin Li#              of the returned rational function. N has size n+1; D
792*412f47f9SXin Li#              has size d+1. Both start with the constant term, i.e.
793*412f47f9SXin Li#              N[i] is the coefficient of x^(i-1) (because Julia
794*412f47f9SXin Li#              arrays are 1-based). D[1] will be 1.
795*412f47f9SXin Li#    E         The maximum weighted error (BigFloat).
796*412f47f9SXin Li#    X         An array of pairs of BigFloats giving the locations of n+2
797*412f47f9SXin Li#              points and the weighted error at each of those points. The
798*412f47f9SXin Li#              weighted error values will have alternating signs, which
799*412f47f9SXin Li#              means that the Chebyshev alternation theorem guarantees
800*412f47f9SXin Li#              that any other function of the same degree must exceed
801*412f47f9SXin Li#              the error of this one at at least one of those points.
802*412f47f9SXin Lifunction ratfn_minimax(f::Function, interval::Tuple{BigFloat,BigFloat}, n, d,
803*412f47f9SXin Li                       w = (x,y)->BigFloat(1))
804*412f47f9SXin Li    # We start off by finding a least-squares approximation. This
805*412f47f9SXin Li    # doesn't need to be perfect, but if we can get it reasonably good
806*412f47f9SXin Li    # then it'll save iterations in the refining stage.
807*412f47f9SXin Li    #
808*412f47f9SXin Li    # Least-squares approximations tend to look nicer in a minimax
809*412f47f9SXin Li    # sense if you evaluate the function at a big pile of Chebyshev
810*412f47f9SXin Li    # nodes rather than uniformly spaced points. These values will
811*412f47f9SXin Li    # also make a good grid to use for the initial search for error
812*412f47f9SXin Li    # extrema, so we'll keep them around for that reason too.
813*412f47f9SXin Li
814*412f47f9SXin Li    # Construct the grid.
815*412f47f9SXin Li    lo, hi = minimum(interval), maximum(interval)
816*412f47f9SXin Li    local grid
817*412f47f9SXin Li    let
818*412f47f9SXin Li        mid = (hi+lo)/2
819*412f47f9SXin Li        halfwid = (hi-lo)/2
820*412f47f9SXin Li        nnodes = 16 * (n+d+1)
821*412f47f9SXin Li        pi = 2*asin(BigFloat(1))
822*412f47f9SXin Li        grid = [ mid - halfwid * cos(pi*i/nnodes) for i=0:1:nnodes ]
823*412f47f9SXin Li    end
824*412f47f9SXin Li
825*412f47f9SXin Li    # Find the initial least-squares approximation.
826*412f47f9SXin Li    (nc, dc) = ratfn_leastsquares(f, grid, n, d, w)
827*412f47f9SXin Li    @debug("minimax", "initial leastsquares approx = ",
828*412f47f9SXin Li           ratfn_to_string(nc, dc))
829*412f47f9SXin Li
830*412f47f9SXin Li    # Threshold of convergence. We stop when the relative difference
831*412f47f9SXin Li    # between the min and max (winnowed) error extrema is less than
832*412f47f9SXin Li    # this.
833*412f47f9SXin Li    #
834*412f47f9SXin Li    # This is set to the cube root of machine epsilon on a more or
835*412f47f9SXin Li    # less empirical basis, because the rational-function case will
836*412f47f9SXin Li    # not converge reliably if you set it to only the square root.
837*412f47f9SXin Li    # (Repeatable by using the --test mode.) On the assumption that
838*412f47f9SXin Li    # input and output error in each iteration can be expected to be
839*412f47f9SXin Li    # related by a simple power law (because it'll just be down to how
840*412f47f9SXin Li    # many leading terms of a Taylor series are zero), the cube root
841*412f47f9SXin Li    # was the next thing to try.
842*412f47f9SXin Li    threshold = 2^(-epsbits/3)
843*412f47f9SXin Li
844*412f47f9SXin Li    # Main loop.
845*412f47f9SXin Li    while true
846*412f47f9SXin Li        # Find all the error extrema we can.
847*412f47f9SXin Li        function compute_error(x)
848*412f47f9SXin Li            real_y = f(x)
849*412f47f9SXin Li            approx_y = ratfn_eval(nc, dc, x)
850*412f47f9SXin Li            return (approx_y - real_y) * w(x, real_y)
851*412f47f9SXin Li        end
852*412f47f9SXin Li        extrema = find_extrema(compute_error, grid)
853*412f47f9SXin Li        @debug("minimax", "all extrema = ", format_xylist(extrema))
854*412f47f9SXin Li
855*412f47f9SXin Li        # Winnow the extrema down to the right number, and ensure they
856*412f47f9SXin Li        # have alternating sign.
857*412f47f9SXin Li        extrema = winnow_extrema(extrema, n+d+2)
858*412f47f9SXin Li        @debug("minimax", "winnowed extrema = ", format_xylist(extrema))
859*412f47f9SXin Li
860*412f47f9SXin Li        # See if we've finished.
861*412f47f9SXin Li        min_err = minimum([abs(y) for (x,y) = extrema])
862*412f47f9SXin Li        max_err = maximum([abs(y) for (x,y) = extrema])
863*412f47f9SXin Li        variation = (max_err - min_err) / max_err
864*412f47f9SXin Li        @debug("minimax", "extremum variation = ", variation)
865*412f47f9SXin Li        if variation < threshold
866*412f47f9SXin Li            @debug("minimax", "done!")
867*412f47f9SXin Li            return nc, dc, max_err, extrema
868*412f47f9SXin Li        end
869*412f47f9SXin Li
870*412f47f9SXin Li        # If not, refine our function by equalising the error at the
871*412f47f9SXin Li        # extrema points, and go round again.
872*412f47f9SXin Li        (nc, dc) = ratfn_equal_deviation(f, map(x->x[1], extrema),
873*412f47f9SXin Li                                         n, d, max_err, w)
874*412f47f9SXin Li        @debug("minimax", "refined approx = ", ratfn_to_string(nc, dc))
875*412f47f9SXin Li    end
876*412f47f9SXin Liend
877*412f47f9SXin Li
878*412f47f9SXin Li# ----------------------------------------------------------------------
879*412f47f9SXin Li# Check if a polynomial is well-conditioned for accurate evaluation in
880*412f47f9SXin Li# a given interval by Horner's rule.
881*412f47f9SXin Li#
882*412f47f9SXin Li# This is true if at every step where Horner's rule computes
883*412f47f9SXin Li# (coefficient + x*value_so_far), the constant coefficient you're
884*412f47f9SXin Li# adding on is of larger magnitude than the x*value_so_far operand.
885*412f47f9SXin Li# And this has to be true for every x in the interval.
886*412f47f9SXin Li#
887*412f47f9SXin Li# Arguments:
888*412f47f9SXin Li#    coeffs    The coefficients of the polynomial under test. Starts with
889*412f47f9SXin Li#              the constant term, i.e. coeffs[i] is the coefficient of
890*412f47f9SXin Li#              x^(i-1) (because Julia arrays are 1-based).
891*412f47f9SXin Li#    lo, hi    The bounds of the interval.
892*412f47f9SXin Li#
893*412f47f9SXin Li# Return value: the largest ratio (x*value_so_far / coefficient), at
894*412f47f9SXin Li# any step of evaluation, for any x in the interval. If this is less
895*412f47f9SXin Li# than 1, the polynomial is at least somewhat well-conditioned;
896*412f47f9SXin Li# ideally you want it to be more like 1/8 or 1/16 or so, so that the
897*412f47f9SXin Li# relative rounding error accumulated at each step are reduced by
898*412f47f9SXin Li# several factors of 2 when the next coefficient is added on.
899*412f47f9SXin Li
900*412f47f9SXin Lifunction wellcond(coeffs, lo, hi)
901*412f47f9SXin Li    x = max(abs(lo), abs(hi))
902*412f47f9SXin Li    worst = 0
903*412f47f9SXin Li    so_far = 0
904*412f47f9SXin Li    for i = length(coeffs):-1:1
905*412f47f9SXin Li        coeff = abs(coeffs[i])
906*412f47f9SXin Li        so_far *= x
907*412f47f9SXin Li        if coeff != 0
908*412f47f9SXin Li            thisval = so_far / coeff
909*412f47f9SXin Li            worst = max(worst, thisval)
910*412f47f9SXin Li            so_far += coeff
911*412f47f9SXin Li        end
912*412f47f9SXin Li    end
913*412f47f9SXin Li    return worst
914*412f47f9SXin Liend
915*412f47f9SXin Li
916*412f47f9SXin Li# ----------------------------------------------------------------------
917*412f47f9SXin Li# Small set of unit tests.
918*412f47f9SXin Li
919*412f47f9SXin Lifunction test()
920*412f47f9SXin Li    passes = 0
921*412f47f9SXin Li    fails = 0
922*412f47f9SXin Li
923*412f47f9SXin Li    function approx_eq(x, y, limit=1e-6)
924*412f47f9SXin Li        return abs(x - y) < limit
925*412f47f9SXin Li    end
926*412f47f9SXin Li
927*412f47f9SXin Li    function test(condition)
928*412f47f9SXin Li        if condition
929*412f47f9SXin Li            passes += 1
930*412f47f9SXin Li        else
931*412f47f9SXin Li            println("fail")
932*412f47f9SXin Li            fails += 1
933*412f47f9SXin Li        end
934*412f47f9SXin Li    end
935*412f47f9SXin Li
936*412f47f9SXin Li    # Test Gaussian elimination.
937*412f47f9SXin Li    println("Gaussian test 1:")
938*412f47f9SXin Li    m = BigFloat[1 1 2; 3 5 8; 13 34 21]
939*412f47f9SXin Li    v = BigFloat[1, -1, 2]
940*412f47f9SXin Li    ret = m \ v
941*412f47f9SXin Li    println("  ",repr(ret))
942*412f47f9SXin Li    test(approx_eq(ret[1], 109/26))
943*412f47f9SXin Li    test(approx_eq(ret[2], -105/130))
944*412f47f9SXin Li    test(approx_eq(ret[3], -31/26))
945*412f47f9SXin Li
946*412f47f9SXin Li    # Test leastsquares rational functions.
947*412f47f9SXin Li    println("Leastsquares test 1:")
948*412f47f9SXin Li    n = 10000
949*412f47f9SXin Li    a = array1d(BigFloat, n+1)
950*412f47f9SXin Li    for i = 0:1:n
951*412f47f9SXin Li        a[1+i] = i/BigFloat(n)
952*412f47f9SXin Li    end
953*412f47f9SXin Li    (nc, dc) = ratfn_leastsquares(x->exp(x), a, 2, 2)
954*412f47f9SXin Li    println("  ",ratfn_to_string(nc, dc))
955*412f47f9SXin Li    for x = a
956*412f47f9SXin Li        test(approx_eq(exp(x), ratfn_eval(nc, dc, x), 1e-4))
957*412f47f9SXin Li    end
958*412f47f9SXin Li
959*412f47f9SXin Li    # Test golden section search.
960*412f47f9SXin Li    println("Golden section test 1:")
961*412f47f9SXin Li    x, y = goldensection(x->sin(x),
962*412f47f9SXin Li                              BigFloat(0), BigFloat(1)/10, BigFloat(4))
963*412f47f9SXin Li    println("  ", x, " -> ", y)
964*412f47f9SXin Li    test(approx_eq(x, asin(BigFloat(1))))
965*412f47f9SXin Li    test(approx_eq(y, 1))
966*412f47f9SXin Li
967*412f47f9SXin Li    # Test extrema-winnowing algorithm.
968*412f47f9SXin Li    println("Winnow test 1:")
969*412f47f9SXin Li    extrema = [(x, sin(20*x)*sin(197*x))
970*412f47f9SXin Li               for x in BigFloat(0):BigFloat(1)/1000:BigFloat(1)]
971*412f47f9SXin Li    winnowed = winnow_extrema(extrema, 12)
972*412f47f9SXin Li    println("  ret = ", format_xylist(winnowed))
973*412f47f9SXin Li    prevx, prevy = -1, 0
974*412f47f9SXin Li    for (x,y) = winnowed
975*412f47f9SXin Li        test(x > prevx)
976*412f47f9SXin Li        test(y != 0)
977*412f47f9SXin Li        test(prevy * y <= 0) # tolerates initial prevx having no sign
978*412f47f9SXin Li        test(abs(y) > 0.9)
979*412f47f9SXin Li        prevx, prevy = x, y
980*412f47f9SXin Li    end
981*412f47f9SXin Li
982*412f47f9SXin Li    # Test actual minimax approximation.
983*412f47f9SXin Li    println("Minimax test 1 (polynomial):")
984*412f47f9SXin Li    (nc, dc, e, x) = ratfn_minimax(x->exp(x), (BigFloat(0), BigFloat(1)), 4, 0)
985*412f47f9SXin Li    println("  ",e)
986*412f47f9SXin Li    println("  ",ratfn_to_string(nc, dc))
987*412f47f9SXin Li    test(0 < e < 1e-3)
988*412f47f9SXin Li    for x = 0:BigFloat(1)/1000:1
989*412f47f9SXin Li        test(abs(ratfn_eval(nc, dc, x) - exp(x)) <= e * 1.0000001)
990*412f47f9SXin Li    end
991*412f47f9SXin Li
992*412f47f9SXin Li    println("Minimax test 2 (rational):")
993*412f47f9SXin Li    (nc, dc, e, x) = ratfn_minimax(x->exp(x), (BigFloat(0), BigFloat(1)), 2, 2)
994*412f47f9SXin Li    println("  ",e)
995*412f47f9SXin Li    println("  ",ratfn_to_string(nc, dc))
996*412f47f9SXin Li    test(0 < e < 1e-3)
997*412f47f9SXin Li    for x = 0:BigFloat(1)/1000:1
998*412f47f9SXin Li        test(abs(ratfn_eval(nc, dc, x) - exp(x)) <= e * 1.0000001)
999*412f47f9SXin Li    end
1000*412f47f9SXin Li
1001*412f47f9SXin Li    println("Minimax test 3 (polynomial, weighted):")
1002*412f47f9SXin Li    (nc, dc, e, x) = ratfn_minimax(x->exp(x), (BigFloat(0), BigFloat(1)), 4, 0,
1003*412f47f9SXin Li                                   (x,y)->1/y)
1004*412f47f9SXin Li    println("  ",e)
1005*412f47f9SXin Li    println("  ",ratfn_to_string(nc, dc))
1006*412f47f9SXin Li    test(0 < e < 1e-3)
1007*412f47f9SXin Li    for x = 0:BigFloat(1)/1000:1
1008*412f47f9SXin Li        test(abs(ratfn_eval(nc, dc, x) - exp(x))/exp(x) <= e * 1.0000001)
1009*412f47f9SXin Li    end
1010*412f47f9SXin Li
1011*412f47f9SXin Li    println("Minimax test 4 (rational, weighted):")
1012*412f47f9SXin Li    (nc, dc, e, x) = ratfn_minimax(x->exp(x), (BigFloat(0), BigFloat(1)), 2, 2,
1013*412f47f9SXin Li                                   (x,y)->1/y)
1014*412f47f9SXin Li    println("  ",e)
1015*412f47f9SXin Li    println("  ",ratfn_to_string(nc, dc))
1016*412f47f9SXin Li    test(0 < e < 1e-3)
1017*412f47f9SXin Li    for x = 0:BigFloat(1)/1000:1
1018*412f47f9SXin Li        test(abs(ratfn_eval(nc, dc, x) - exp(x))/exp(x) <= e * 1.0000001)
1019*412f47f9SXin Li    end
1020*412f47f9SXin Li
1021*412f47f9SXin Li    println("Minimax test 5 (rational, weighted, odd degree):")
1022*412f47f9SXin Li    (nc, dc, e, x) = ratfn_minimax(x->exp(x), (BigFloat(0), BigFloat(1)), 2, 1,
1023*412f47f9SXin Li                                   (x,y)->1/y)
1024*412f47f9SXin Li    println("  ",e)
1025*412f47f9SXin Li    println("  ",ratfn_to_string(nc, dc))
1026*412f47f9SXin Li    test(0 < e < 1e-3)
1027*412f47f9SXin Li    for x = 0:BigFloat(1)/1000:1
1028*412f47f9SXin Li        test(abs(ratfn_eval(nc, dc, x) - exp(x))/exp(x) <= e * 1.0000001)
1029*412f47f9SXin Li    end
1030*412f47f9SXin Li
1031*412f47f9SXin Li    total = passes + fails
1032*412f47f9SXin Li    println(passes, " passes ", fails, " fails ", total, " total")
1033*412f47f9SXin Liend
1034*412f47f9SXin Li
1035*412f47f9SXin Li# ----------------------------------------------------------------------
1036*412f47f9SXin Li# Online help.
1037*412f47f9SXin Lifunction help()
1038*412f47f9SXin Li    print("""
1039*412f47f9SXin LiUsage:
1040*412f47f9SXin Li
1041*412f47f9SXin Li    remez.jl [options] <lo> <hi> <n> <d> <expr> [<weight>]
1042*412f47f9SXin Li
1043*412f47f9SXin LiArguments:
1044*412f47f9SXin Li
1045*412f47f9SXin Li    <lo>, <hi>
1046*412f47f9SXin Li
1047*412f47f9SXin Li        Bounds of the interval on which to approximate the target
1048*412f47f9SXin Li        function. These are parsed and evaluated as Julia expressions,
1049*412f47f9SXin Li        so you can write things like '1/BigFloat(6)' to get an
1050*412f47f9SXin Li        accurate representation of 1/6, or '4*atan(BigFloat(1))' to
1051*412f47f9SXin Li        get pi. (Unfortunately, the obvious 'BigFloat(pi)' doesn't
1052*412f47f9SXin Li        work in Julia.)
1053*412f47f9SXin Li
1054*412f47f9SXin Li    <n>, <d>
1055*412f47f9SXin Li
1056*412f47f9SXin Li        The desired degree of polynomial(s) you want for your
1057*412f47f9SXin Li        approximation. These should be non-negative integers. If you
1058*412f47f9SXin Li        want a rational function as output, set <n> to the degree of
1059*412f47f9SXin Li        the numerator, and <d> the denominator. If you just want an
1060*412f47f9SXin Li        ordinary polynomial, set <d> to 0, and <n> to the degree of
1061*412f47f9SXin Li        the polynomial you want.
1062*412f47f9SXin Li
1063*412f47f9SXin Li    <expr>
1064*412f47f9SXin Li
1065*412f47f9SXin Li        A Julia expression giving the function to be approximated on
1066*412f47f9SXin Li        the interval. The input value is predefined as 'x' when this
1067*412f47f9SXin Li        expression is evaluated, so you should write something along
1068*412f47f9SXin Li        the lines of 'sin(x)' or 'sqrt(1+tan(x)^2)' etc.
1069*412f47f9SXin Li
1070*412f47f9SXin Li    <weight>
1071*412f47f9SXin Li
1072*412f47f9SXin Li        If provided, a Julia expression giving the weighting factor
1073*412f47f9SXin Li        for the approximation error. The output polynomial will
1074*412f47f9SXin Li        minimise the largest absolute value of (P-f) * w at any point
1075*412f47f9SXin Li        in the interval, where P is the value of the polynomial, f is
1076*412f47f9SXin Li        the value of the target function given by <expr>, and w is the
1077*412f47f9SXin Li        weight given by this function.
1078*412f47f9SXin Li
1079*412f47f9SXin Li        When this expression is evaluated, the input value to P and f
1080*412f47f9SXin Li        is predefined as 'x', and also the true output value f(x) is
1081*412f47f9SXin Li        predefined as 'y'. So you can minimise the relative error by
1082*412f47f9SXin Li        simply writing '1/y'.
1083*412f47f9SXin Li
1084*412f47f9SXin Li        If the <weight> argument is not provided, the default
1085*412f47f9SXin Li        weighting function always returns 1, so that the polynomial
1086*412f47f9SXin Li        will minimise the maximum absolute error |P-f|.
1087*412f47f9SXin Li
1088*412f47f9SXin LiComputation options:
1089*412f47f9SXin Li
1090*412f47f9SXin Li    --pre=<predef_expr>
1091*412f47f9SXin Li
1092*412f47f9SXin Li        Evaluate the Julia expression <predef_expr> before starting
1093*412f47f9SXin Li        the computation. This permits you to pre-define variables or
1094*412f47f9SXin Li        functions which the Julia expressions in your main arguments
1095*412f47f9SXin Li        can refer to. All of <lo>, <hi>, <expr> and <weight> can make
1096*412f47f9SXin Li        use of things defined by <predef_expr>.
1097*412f47f9SXin Li
1098*412f47f9SXin Li        One internal remez.jl function that you might sometimes find
1099*412f47f9SXin Li        useful in this expression is 'goldensection', which finds the
1100*412f47f9SXin Li        location and value of a maximum of a function. For example,
1101*412f47f9SXin Li        one implementation strategy for the gamma function involves
1102*412f47f9SXin Li        translating it to put its unique local minimum at the origin,
1103*412f47f9SXin Li        in which case you can write something like this
1104*412f47f9SXin Li
1105*412f47f9SXin Li            --pre='(m,my) = goldensection(x -> -gamma(x),
1106*412f47f9SXin Li                  BigFloat(1), BigFloat(1.5), BigFloat(2))'
1107*412f47f9SXin Li
1108*412f47f9SXin Li        to predefine 'm' as the location of gamma's minimum, and 'my'
1109*412f47f9SXin Li        as the (negated) value that gamma actually takes at that
1110*412f47f9SXin Li        point, i.e. -gamma(m).
1111*412f47f9SXin Li
1112*412f47f9SXin Li        (Since 'goldensection' always finds a maximum, we had to
1113*412f47f9SXin Li        negate gamma in the input function to make it find a minimum
1114*412f47f9SXin Li        instead. Consult the comments in the source for more details
1115*412f47f9SXin Li        on the use of this function.)
1116*412f47f9SXin Li
1117*412f47f9SXin Li        If you use this option more than once, all the expressions you
1118*412f47f9SXin Li        provide will be run in sequence.
1119*412f47f9SXin Li
1120*412f47f9SXin Li    --bits=<bits>
1121*412f47f9SXin Li
1122*412f47f9SXin Li        Specify the accuracy to which you want the output polynomial,
1123*412f47f9SXin Li        in bits. Default 256, which should be more than enough.
1124*412f47f9SXin Li
1125*412f47f9SXin Li    --bigfloatbits=<bits>
1126*412f47f9SXin Li
1127*412f47f9SXin Li        Turn up the precision used by Julia for its BigFloat
1128*412f47f9SXin Li        evaluation. Default is Julia's default (also 256). You might
1129*412f47f9SXin Li        want to try setting this higher than the --bits value if the
1130*412f47f9SXin Li        algorithm is failing to converge for some reason.
1131*412f47f9SXin Li
1132*412f47f9SXin LiOutput options:
1133*412f47f9SXin Li
1134*412f47f9SXin Li    --full
1135*412f47f9SXin Li
1136*412f47f9SXin Li        Instead of just printing the approximation function itself,
1137*412f47f9SXin Li        also print auxiliary information:
1138*412f47f9SXin Li         - the locations of the error extrema, and the actual
1139*412f47f9SXin Li           (weighted) error at each of those locations
1140*412f47f9SXin Li         - the overall maximum error of the function
1141*412f47f9SXin Li         - a 'well-conditioning quotient', giving the worst-case ratio
1142*412f47f9SXin Li           between any polynomial coefficient and the largest possible
1143*412f47f9SXin Li           value of the higher-order terms it will be added to.
1144*412f47f9SXin Li
1145*412f47f9SXin Li        The well-conditioning quotient should be less than 1, ideally
1146*412f47f9SXin Li        by several factors of two, for accurate evaluation in the
1147*412f47f9SXin Li        target precision. If you request a rational function, a
1148*412f47f9SXin Li        separate well-conditioning quotient will be printed for the
1149*412f47f9SXin Li        numerator and denominator.
1150*412f47f9SXin Li
1151*412f47f9SXin Li        Use this option when deciding how wide an interval to
1152*412f47f9SXin Li        approximate your function on, and what degree of polynomial
1153*412f47f9SXin Li        you need.
1154*412f47f9SXin Li
1155*412f47f9SXin Li    --variable=<identifier>
1156*412f47f9SXin Li
1157*412f47f9SXin Li        When writing the output polynomial or rational function in its
1158*412f47f9SXin Li        usual form as an arithmetic expression, use <identifier> as
1159*412f47f9SXin Li        the name of the input variable. Default is 'x'.
1160*412f47f9SXin Li
1161*412f47f9SXin Li    --suffix=<suffix>
1162*412f47f9SXin Li
1163*412f47f9SXin Li        When writing the output polynomial or rational function in its
1164*412f47f9SXin Li        usual form as an arithmetic expression, write <suffix> after
1165*412f47f9SXin Li        every floating-point literal. For example, '--suffix=F' will
1166*412f47f9SXin Li        generate a C expression in which the coefficients are literals
1167*412f47f9SXin Li        of type 'float' rather than 'double'.
1168*412f47f9SXin Li
1169*412f47f9SXin Li    --array
1170*412f47f9SXin Li
1171*412f47f9SXin Li        Instead of writing the output polynomial as an arithmetic
1172*412f47f9SXin Li        expression in Horner's rule form, write out just its
1173*412f47f9SXin Li        coefficients, one per line, each with a trailing comma.
1174*412f47f9SXin Li        Suitable for pasting into a C array declaration.
1175*412f47f9SXin Li
1176*412f47f9SXin Li        This option is not currently supported if the output is a
1177*412f47f9SXin Li        rational function, because you'd need two separate arrays for
1178*412f47f9SXin Li        the numerator and denominator coefficients and there's no
1179*412f47f9SXin Li        obviously right way to provide both of those together.
1180*412f47f9SXin Li
1181*412f47f9SXin LiDebug and test options:
1182*412f47f9SXin Li
1183*412f47f9SXin Li    --debug=<facility>
1184*412f47f9SXin Li
1185*412f47f9SXin Li        Enable debugging output from various parts of the Remez
1186*412f47f9SXin Li        calculation. <facility> should be the name of one of the
1187*412f47f9SXin Li        classes of diagnostic output implemented in the program.
1188*412f47f9SXin Li        Useful values include 'gausselim', 'leastsquares',
1189*412f47f9SXin Li        'goldensection', 'equaldev', 'minimax'. This is probably
1190*412f47f9SXin Li        mostly useful to people debugging problems with the script, so
1191*412f47f9SXin Li        consult the source code for more information about what the
1192*412f47f9SXin Li        diagnostic output for each of those facilities will be.
1193*412f47f9SXin Li
1194*412f47f9SXin Li        If you want diagnostics from more than one facility, specify
1195*412f47f9SXin Li        this option multiple times with different arguments.
1196*412f47f9SXin Li
1197*412f47f9SXin Li    --test
1198*412f47f9SXin Li
1199*412f47f9SXin Li        Run remez.jl's internal test suite. No arguments needed.
1200*412f47f9SXin Li
1201*412f47f9SXin LiMiscellaneous options:
1202*412f47f9SXin Li
1203*412f47f9SXin Li    --help
1204*412f47f9SXin Li
1205*412f47f9SXin Li        Display this text and exit. No arguments needed.
1206*412f47f9SXin Li
1207*412f47f9SXin Li""")
1208*412f47f9SXin Liend
1209*412f47f9SXin Li
1210*412f47f9SXin Li# ----------------------------------------------------------------------
1211*412f47f9SXin Li# Main program.
1212*412f47f9SXin Li
1213*412f47f9SXin Lifunction main()
1214*412f47f9SXin Li    nargs = length(argwords)
1215*412f47f9SXin Li    if nargs != 5 && nargs != 6
1216*412f47f9SXin Li        error("usage: remez.jl <lo> <hi> <n> <d> <expr> [<weight>]\n" *
1217*412f47f9SXin Li              "       run 'remez.jl --help' for more help")
1218*412f47f9SXin Li    end
1219*412f47f9SXin Li
1220*412f47f9SXin Li    for preliminary_command in preliminary_commands
1221*412f47f9SXin Li        eval(Meta.parse(preliminary_command))
1222*412f47f9SXin Li    end
1223*412f47f9SXin Li
1224*412f47f9SXin Li    lo = BigFloat(eval(Meta.parse(argwords[1])))
1225*412f47f9SXin Li    hi = BigFloat(eval(Meta.parse(argwords[2])))
1226*412f47f9SXin Li    n = parse(Int,argwords[3])
1227*412f47f9SXin Li    d = parse(Int,argwords[4])
1228*412f47f9SXin Li    f = eval(Meta.parse("x -> " * argwords[5]))
1229*412f47f9SXin Li
1230*412f47f9SXin Li    # Wrap the user-provided function with a function of our own. This
1231*412f47f9SXin Li    # arranges to detect silly FP values (inf,nan) early and diagnose
1232*412f47f9SXin Li    # them sensibly, and also lets us log all evaluations of the
1233*412f47f9SXin Li    # function in case you suspect it's doing the wrong thing at some
1234*412f47f9SXin Li    # special-case point.
1235*412f47f9SXin Li    function func(x)
1236*412f47f9SXin Li        y = run(f,x)
1237*412f47f9SXin Li        @debug("f", x, " -> ", y)
1238*412f47f9SXin Li        if !isfinite(y)
1239*412f47f9SXin Li            error("f(" * string(x) * ") returned non-finite value " * string(y))
1240*412f47f9SXin Li        end
1241*412f47f9SXin Li        return y
1242*412f47f9SXin Li    end
1243*412f47f9SXin Li
1244*412f47f9SXin Li    if nargs == 6
1245*412f47f9SXin Li        # Wrap the user-provided weight function similarly.
1246*412f47f9SXin Li        w = eval(Meta.parse("(x,y) -> " * argwords[6]))
1247*412f47f9SXin Li        function wrapped_weight(x,y)
1248*412f47f9SXin Li            ww = run(w,x,y)
1249*412f47f9SXin Li            if !isfinite(ww)
1250*412f47f9SXin Li                error("w(" * string(x) * "," * string(y) *
1251*412f47f9SXin Li                      ") returned non-finite value " * string(ww))
1252*412f47f9SXin Li            end
1253*412f47f9SXin Li            return ww
1254*412f47f9SXin Li        end
1255*412f47f9SXin Li        weight = wrapped_weight
1256*412f47f9SXin Li    else
1257*412f47f9SXin Li        weight = (x,y)->BigFloat(1)
1258*412f47f9SXin Li    end
1259*412f47f9SXin Li
1260*412f47f9SXin Li    (nc, dc, e, extrema) = ratfn_minimax(func, (lo, hi), n, d, weight)
1261*412f47f9SXin Li    if array_format
1262*412f47f9SXin Li        if d == 0
1263*412f47f9SXin Li            functext = join([string(x)*",\n" for x=nc],"")
1264*412f47f9SXin Li        else
1265*412f47f9SXin Li            # It's unclear how you should best format an array of
1266*412f47f9SXin Li            # coefficients for a rational function, so I'll leave
1267*412f47f9SXin Li            # implementing this option until I have a use case.
1268*412f47f9SXin Li            error("--array unsupported for rational functions")
1269*412f47f9SXin Li        end
1270*412f47f9SXin Li    else
1271*412f47f9SXin Li        functext = ratfn_to_string(nc, dc) * "\n"
1272*412f47f9SXin Li    end
1273*412f47f9SXin Li    if full_output
1274*412f47f9SXin Li        # Print everything you might want to know about the function
1275*412f47f9SXin Li        println("extrema = ", format_xylist(extrema))
1276*412f47f9SXin Li        println("maxerror = ", string(e))
1277*412f47f9SXin Li        if length(dc) > 1
1278*412f47f9SXin Li            println("wellconditioning_numerator = ",
1279*412f47f9SXin Li                    string(wellcond(nc, lo, hi)))
1280*412f47f9SXin Li            println("wellconditioning_denominator = ",
1281*412f47f9SXin Li                    string(wellcond(dc, lo, hi)))
1282*412f47f9SXin Li        else
1283*412f47f9SXin Li            println("wellconditioning = ", string(wellcond(nc, lo, hi)))
1284*412f47f9SXin Li        end
1285*412f47f9SXin Li        print("function = ", functext)
1286*412f47f9SXin Li    else
1287*412f47f9SXin Li        # Just print the text people will want to paste into their code
1288*412f47f9SXin Li        print(functext)
1289*412f47f9SXin Li    end
1290*412f47f9SXin Liend
1291*412f47f9SXin Li
1292*412f47f9SXin Li# ----------------------------------------------------------------------
1293*412f47f9SXin Li# Top-level code: parse the argument list and decide what to do.
1294*412f47f9SXin Li
1295*412f47f9SXin Liwhat_to_do = main
1296*412f47f9SXin Li
1297*412f47f9SXin Lidoing_opts = true
1298*412f47f9SXin Liargwords = array1d(String, 0)
1299*412f47f9SXin Lifor arg = ARGS
1300*412f47f9SXin Li    global doing_opts, what_to_do, argwords
1301*412f47f9SXin Li    global full_output, array_format, xvarname, floatsuffix, epsbits
1302*412f47f9SXin Li    if doing_opts && startswith(arg, "-")
1303*412f47f9SXin Li        if arg == "--"
1304*412f47f9SXin Li            doing_opts = false
1305*412f47f9SXin Li        elseif arg == "--help"
1306*412f47f9SXin Li            what_to_do = help
1307*412f47f9SXin Li        elseif arg == "--test"
1308*412f47f9SXin Li            what_to_do = test
1309*412f47f9SXin Li        elseif arg == "--full"
1310*412f47f9SXin Li            full_output = true
1311*412f47f9SXin Li        elseif arg == "--array"
1312*412f47f9SXin Li            array_format = true
1313*412f47f9SXin Li        elseif startswith(arg, "--debug=")
1314*412f47f9SXin Li            enable_debug(arg[length("--debug=")+1:end])
1315*412f47f9SXin Li        elseif startswith(arg, "--variable=")
1316*412f47f9SXin Li            xvarname = arg[length("--variable=")+1:end]
1317*412f47f9SXin Li        elseif startswith(arg, "--suffix=")
1318*412f47f9SXin Li            floatsuffix = arg[length("--suffix=")+1:end]
1319*412f47f9SXin Li        elseif startswith(arg, "--bits=")
1320*412f47f9SXin Li            epsbits = parse(Int,arg[length("--bits=")+1:end])
1321*412f47f9SXin Li        elseif startswith(arg, "--bigfloatbits=")
1322*412f47f9SXin Li            set_bigfloat_precision(
1323*412f47f9SXin Li                parse(Int,arg[length("--bigfloatbits=")+1:end]))
1324*412f47f9SXin Li        elseif startswith(arg, "--pre=")
1325*412f47f9SXin Li            push!(preliminary_commands, arg[length("--pre=")+1:end])
1326*412f47f9SXin Li        else
1327*412f47f9SXin Li            error("unrecognised option: ", arg)
1328*412f47f9SXin Li        end
1329*412f47f9SXin Li    else
1330*412f47f9SXin Li        push!(argwords, arg)
1331*412f47f9SXin Li    end
1332*412f47f9SXin Liend
1333*412f47f9SXin Li
1334*412f47f9SXin Liwhat_to_do()
1335