1*5ddc57e5SXin Li /*
2*5ddc57e5SXin Li * Library: lmfit (Levenberg-Marquardt least squares fitting)
3*5ddc57e5SXin Li *
4*5ddc57e5SXin Li * File: lmmin.c
5*5ddc57e5SXin Li *
6*5ddc57e5SXin Li * Contents: Levenberg-Marquardt minimization.
7*5ddc57e5SXin Li *
8*5ddc57e5SXin Li * Copyright: MINPACK authors, The University of Chikago (1980-1999)
9*5ddc57e5SXin Li * Joachim Wuttke, Forschungszentrum Juelich GmbH (2004-2013)
10*5ddc57e5SXin Li *
11*5ddc57e5SXin Li * License: see ../COPYING (FreeBSD)
12*5ddc57e5SXin Li *
13*5ddc57e5SXin Li * Homepage: apps.jcns.fz-juelich.de/lmfit
14*5ddc57e5SXin Li */
15*5ddc57e5SXin Li
16*5ddc57e5SXin Li #include <assert.h>
17*5ddc57e5SXin Li #include <stdlib.h>
18*5ddc57e5SXin Li #include <stdio.h>
19*5ddc57e5SXin Li #include <math.h>
20*5ddc57e5SXin Li #include <float.h>
21*5ddc57e5SXin Li #include "lmmin.h"
22*5ddc57e5SXin Li
23*5ddc57e5SXin Li #define MIN(a, b) (((a) <= (b)) ? (a) : (b))
24*5ddc57e5SXin Li #define MAX(a, b) (((a) >= (b)) ? (a) : (b))
25*5ddc57e5SXin Li #define SQR(x) (x) * (x)
26*5ddc57e5SXin Li
27*5ddc57e5SXin Li /* Declare functions that do the heavy numerics.
28*5ddc57e5SXin Li Implementions are in this source file, below lmmin.
29*5ddc57e5SXin Li Dependences: lmmin calls lmpar, which calls qrfac and qrsolv. */
30*5ddc57e5SXin Li void lm_lmpar(const int n, double* r, const int ldr, const int* Pivot,
31*5ddc57e5SXin Li const double* diag, const double* qtb, const double delta,
32*5ddc57e5SXin Li double* par, double* x, double* Sdiag, double* aux, double* xdi);
33*5ddc57e5SXin Li void lm_qrfac(const int m, const int n, double* A, int* Pivot, double* Rdiag,
34*5ddc57e5SXin Li double* Acnorm, double* W);
35*5ddc57e5SXin Li void lm_qrsolv(const int n, double* r, const int ldr, const int* Pivot,
36*5ddc57e5SXin Li const double* diag, const double* qtb, double* x,
37*5ddc57e5SXin Li double* Sdiag, double* W);
38*5ddc57e5SXin Li
39*5ddc57e5SXin Li /******************************************************************************/
40*5ddc57e5SXin Li /* Numeric constants */
41*5ddc57e5SXin Li /******************************************************************************/
42*5ddc57e5SXin Li
43*5ddc57e5SXin Li /* Set machine-dependent constants to values from float.h. */
44*5ddc57e5SXin Li #define LM_MACHEP DBL_EPSILON /* resolution of arithmetic */
45*5ddc57e5SXin Li #define LM_DWARF DBL_MIN /* smallest nonzero number */
46*5ddc57e5SXin Li #define LM_SQRT_DWARF sqrt(DBL_MIN) /* square should not underflow */
47*5ddc57e5SXin Li #define LM_SQRT_GIANT sqrt(DBL_MAX) /* square should not overflow */
48*5ddc57e5SXin Li #define LM_USERTOL 30 * LM_MACHEP /* users are recommended to require this */
49*5ddc57e5SXin Li
50*5ddc57e5SXin Li /* If the above values do not work, the following seem good for an x86:
51*5ddc57e5SXin Li LM_MACHEP .555e-16
52*5ddc57e5SXin Li LM_DWARF 9.9e-324
53*5ddc57e5SXin Li LM_SQRT_DWARF 1.e-160
54*5ddc57e5SXin Li LM_SQRT_GIANT 1.e150
55*5ddc57e5SXin Li LM_USER_TOL 1.e-14
56*5ddc57e5SXin Li The following values should work on any machine:
57*5ddc57e5SXin Li LM_MACHEP 1.2e-16
58*5ddc57e5SXin Li LM_DWARF 1.0e-38
59*5ddc57e5SXin Li LM_SQRT_DWARF 3.834e-20
60*5ddc57e5SXin Li LM_SQRT_GIANT 1.304e19
61*5ddc57e5SXin Li LM_USER_TOL 1.e-14
62*5ddc57e5SXin Li */
63*5ddc57e5SXin Li
64*5ddc57e5SXin Li /* Predefined control parameter sets (msgfile=NULL means stdout). */
65*5ddc57e5SXin Li const lm_control_struct lm_control_double = {
66*5ddc57e5SXin Li LM_USERTOL, LM_USERTOL, LM_USERTOL, LM_USERTOL,
67*5ddc57e5SXin Li 100., 100, 1, NULL, 0, -1, -1};
68*5ddc57e5SXin Li const lm_control_struct lm_control_float = {
69*5ddc57e5SXin Li 1.e-7, 1.e-7, 1.e-7, 1.e-7,
70*5ddc57e5SXin Li 100., 100, 1, NULL, 0, -1, -1};
71*5ddc57e5SXin Li
72*5ddc57e5SXin Li /******************************************************************************/
73*5ddc57e5SXin Li /* Message texts (indexed by status.info) */
74*5ddc57e5SXin Li /******************************************************************************/
75*5ddc57e5SXin Li
76*5ddc57e5SXin Li const char* lm_infmsg[] = {
77*5ddc57e5SXin Li "found zero (sum of squares below underflow limit)",
78*5ddc57e5SXin Li "converged (the relative error in the sum of squares is at most tol)",
79*5ddc57e5SXin Li "converged (the relative error of the parameter vector is at most tol)",
80*5ddc57e5SXin Li "converged (both errors are at most tol)",
81*5ddc57e5SXin Li "trapped (by degeneracy; increasing epsilon might help)",
82*5ddc57e5SXin Li "exhausted (number of function calls exceeding preset patience)",
83*5ddc57e5SXin Li "failed (ftol<tol: cannot reduce sum of squares any further)",
84*5ddc57e5SXin Li "failed (xtol<tol: cannot improve approximate solution any further)",
85*5ddc57e5SXin Li "failed (gtol<tol: cannot improve approximate solution any further)",
86*5ddc57e5SXin Li "crashed (not enough memory)",
87*5ddc57e5SXin Li "exploded (fatal coding error: improper input parameters)",
88*5ddc57e5SXin Li "stopped (break requested within function evaluation)",
89*5ddc57e5SXin Li "found nan (function value is not-a-number or infinite)"};
90*5ddc57e5SXin Li
91*5ddc57e5SXin Li const char* lm_shortmsg[] = {
92*5ddc57e5SXin Li "found zero",
93*5ddc57e5SXin Li "converged (f)",
94*5ddc57e5SXin Li "converged (p)",
95*5ddc57e5SXin Li "converged (2)",
96*5ddc57e5SXin Li "degenerate",
97*5ddc57e5SXin Li "call limit",
98*5ddc57e5SXin Li "failed (f)",
99*5ddc57e5SXin Li "failed (p)",
100*5ddc57e5SXin Li "failed (o)",
101*5ddc57e5SXin Li "no memory",
102*5ddc57e5SXin Li "invalid input",
103*5ddc57e5SXin Li "user break",
104*5ddc57e5SXin Li "found nan"};
105*5ddc57e5SXin Li
106*5ddc57e5SXin Li /******************************************************************************/
107*5ddc57e5SXin Li /* Monitoring auxiliaries. */
108*5ddc57e5SXin Li /******************************************************************************/
109*5ddc57e5SXin Li
lm_print_pars(int nout,const double * par,FILE * fout)110*5ddc57e5SXin Li void lm_print_pars(int nout, const double* par, FILE* fout)
111*5ddc57e5SXin Li {
112*5ddc57e5SXin Li int i;
113*5ddc57e5SXin Li for (i = 0; i < nout; ++i)
114*5ddc57e5SXin Li fprintf(fout, " %16.9g", par[i]);
115*5ddc57e5SXin Li fprintf(fout, "\n");
116*5ddc57e5SXin Li }
117*5ddc57e5SXin Li
118*5ddc57e5SXin Li /******************************************************************************/
119*5ddc57e5SXin Li /* lmmin (main minimization routine) */
120*5ddc57e5SXin Li /******************************************************************************/
121*5ddc57e5SXin Li
lmmin(const int n,double * x,const int m,const void * data,void (* evaluate)(const double * par,const int m_dat,const void * data,double * fvec,int * userbreak),const lm_control_struct * C,lm_status_struct * S)122*5ddc57e5SXin Li void lmmin(const int n, double* x, const int m, const void* data,
123*5ddc57e5SXin Li void (*evaluate)(const double* par, const int m_dat,
124*5ddc57e5SXin Li const void* data, double* fvec, int* userbreak),
125*5ddc57e5SXin Li const lm_control_struct* C, lm_status_struct* S)
126*5ddc57e5SXin Li {
127*5ddc57e5SXin Li int j, i;
128*5ddc57e5SXin Li double actred, dirder, fnorm, fnorm1, gnorm, pnorm, prered, ratio, step,
129*5ddc57e5SXin Li sum, temp, temp1, temp2, temp3;
130*5ddc57e5SXin Li
131*5ddc57e5SXin Li /*** Initialize internal variables. ***/
132*5ddc57e5SXin Li
133*5ddc57e5SXin Li int maxfev = C->patience * (n+1);
134*5ddc57e5SXin Li
135*5ddc57e5SXin Li int inner_success; /* flag for loop control */
136*5ddc57e5SXin Li double lmpar = 0; /* Levenberg-Marquardt parameter */
137*5ddc57e5SXin Li double delta = 0;
138*5ddc57e5SXin Li double xnorm = 0;
139*5ddc57e5SXin Li double eps = sqrt(MAX(C->epsilon, LM_MACHEP)); /* for forward differences */
140*5ddc57e5SXin Li
141*5ddc57e5SXin Li int nout = C->n_maxpri == -1 ? n : MIN(C->n_maxpri, n);
142*5ddc57e5SXin Li
143*5ddc57e5SXin Li /* Reinterpret C->msgfile=NULL as stdout (which is unavailable for
144*5ddc57e5SXin Li compile-time initialization of lm_control_double and similar). */
145*5ddc57e5SXin Li FILE* msgfile = C->msgfile ? C->msgfile : stdout;
146*5ddc57e5SXin Li
147*5ddc57e5SXin Li /*** Default status info; must be set before first return statement. ***/
148*5ddc57e5SXin Li
149*5ddc57e5SXin Li S->outcome = 0; /* status code */
150*5ddc57e5SXin Li S->userbreak = 0;
151*5ddc57e5SXin Li S->nfev = 0; /* function evaluation counter */
152*5ddc57e5SXin Li
153*5ddc57e5SXin Li /*** Check input parameters for errors. ***/
154*5ddc57e5SXin Li
155*5ddc57e5SXin Li if (n <= 0) {
156*5ddc57e5SXin Li fprintf(stderr, "lmmin: invalid number of parameters %i\n", n);
157*5ddc57e5SXin Li S->outcome = 10;
158*5ddc57e5SXin Li return;
159*5ddc57e5SXin Li }
160*5ddc57e5SXin Li if (m < n) {
161*5ddc57e5SXin Li fprintf(stderr, "lmmin: number of data points (%i) "
162*5ddc57e5SXin Li "smaller than number of parameters (%i)\n",
163*5ddc57e5SXin Li m, n);
164*5ddc57e5SXin Li S->outcome = 10;
165*5ddc57e5SXin Li return;
166*5ddc57e5SXin Li }
167*5ddc57e5SXin Li if (C->ftol < 0 || C->xtol < 0 || C->gtol < 0) {
168*5ddc57e5SXin Li fprintf(stderr,
169*5ddc57e5SXin Li "lmmin: negative tolerance (at least one of %g %g %g)\n",
170*5ddc57e5SXin Li C->ftol, C->xtol, C->gtol);
171*5ddc57e5SXin Li S->outcome = 10;
172*5ddc57e5SXin Li return;
173*5ddc57e5SXin Li }
174*5ddc57e5SXin Li if (maxfev <= 0) {
175*5ddc57e5SXin Li fprintf(stderr, "lmmin: nonpositive function evaluations limit %i\n",
176*5ddc57e5SXin Li maxfev);
177*5ddc57e5SXin Li S->outcome = 10;
178*5ddc57e5SXin Li return;
179*5ddc57e5SXin Li }
180*5ddc57e5SXin Li if (C->stepbound <= 0) {
181*5ddc57e5SXin Li fprintf(stderr, "lmmin: nonpositive stepbound %g\n", C->stepbound);
182*5ddc57e5SXin Li S->outcome = 10;
183*5ddc57e5SXin Li return;
184*5ddc57e5SXin Li }
185*5ddc57e5SXin Li if (C->scale_diag != 0 && C->scale_diag != 1) {
186*5ddc57e5SXin Li fprintf(stderr, "lmmin: logical variable scale_diag=%i, "
187*5ddc57e5SXin Li "should be 0 or 1\n",
188*5ddc57e5SXin Li C->scale_diag);
189*5ddc57e5SXin Li S->outcome = 10;
190*5ddc57e5SXin Li return;
191*5ddc57e5SXin Li }
192*5ddc57e5SXin Li
193*5ddc57e5SXin Li /*** Allocate work space. ***/
194*5ddc57e5SXin Li
195*5ddc57e5SXin Li /* Allocate total workspace with just one system call */
196*5ddc57e5SXin Li char* ws;
197*5ddc57e5SXin Li if ((ws = malloc((2*m + 5*n + m*n) * sizeof(double) +
198*5ddc57e5SXin Li n * sizeof(int))) == NULL) {
199*5ddc57e5SXin Li S->outcome = 9;
200*5ddc57e5SXin Li return;
201*5ddc57e5SXin Li }
202*5ddc57e5SXin Li
203*5ddc57e5SXin Li /* Assign workspace segments. */
204*5ddc57e5SXin Li char* pws = ws;
205*5ddc57e5SXin Li double* fvec = (double*)pws;
206*5ddc57e5SXin Li pws += m * sizeof(double) / sizeof(char);
207*5ddc57e5SXin Li double* diag = (double*)pws;
208*5ddc57e5SXin Li pws += n * sizeof(double) / sizeof(char);
209*5ddc57e5SXin Li double* qtf = (double*)pws;
210*5ddc57e5SXin Li pws += n * sizeof(double) / sizeof(char);
211*5ddc57e5SXin Li double* fjac = (double*)pws;
212*5ddc57e5SXin Li pws += n * m * sizeof(double) / sizeof(char);
213*5ddc57e5SXin Li double* wa1 = (double*)pws;
214*5ddc57e5SXin Li pws += n * sizeof(double) / sizeof(char);
215*5ddc57e5SXin Li double* wa2 = (double*)pws;
216*5ddc57e5SXin Li pws += n * sizeof(double) / sizeof(char);
217*5ddc57e5SXin Li double* wa3 = (double*)pws;
218*5ddc57e5SXin Li pws += n * sizeof(double) / sizeof(char);
219*5ddc57e5SXin Li double* wf = (double*)pws;
220*5ddc57e5SXin Li pws += m * sizeof(double) / sizeof(char);
221*5ddc57e5SXin Li int* Pivot = (int*)pws;
222*5ddc57e5SXin Li pws += n * sizeof(int) / sizeof(char);
223*5ddc57e5SXin Li
224*5ddc57e5SXin Li /* Initialize diag. */
225*5ddc57e5SXin Li if (!C->scale_diag)
226*5ddc57e5SXin Li for (j = 0; j < n; j++)
227*5ddc57e5SXin Li diag[j] = 1;
228*5ddc57e5SXin Li
229*5ddc57e5SXin Li /*** Evaluate function at starting point and calculate norm. ***/
230*5ddc57e5SXin Li
231*5ddc57e5SXin Li if (C->verbosity) {
232*5ddc57e5SXin Li fprintf(msgfile, "lmmin start ");
233*5ddc57e5SXin Li lm_print_pars(nout, x, msgfile);
234*5ddc57e5SXin Li }
235*5ddc57e5SXin Li (*evaluate)(x, m, data, fvec, &(S->userbreak));
236*5ddc57e5SXin Li if (C->verbosity > 4)
237*5ddc57e5SXin Li for (i = 0; i < m; ++i)
238*5ddc57e5SXin Li fprintf(msgfile, " fvec[%4i] = %18.8g\n", i, fvec[i]);
239*5ddc57e5SXin Li S->nfev = 1;
240*5ddc57e5SXin Li if (S->userbreak)
241*5ddc57e5SXin Li goto terminate;
242*5ddc57e5SXin Li fnorm = lm_enorm(m, fvec);
243*5ddc57e5SXin Li if (C->verbosity)
244*5ddc57e5SXin Li fprintf(msgfile, " fnorm = %18.8g\n", fnorm);
245*5ddc57e5SXin Li
246*5ddc57e5SXin Li if (!isfinite(fnorm)) {
247*5ddc57e5SXin Li S->outcome = 12; /* nan */
248*5ddc57e5SXin Li goto terminate;
249*5ddc57e5SXin Li } else if (fnorm <= LM_DWARF) {
250*5ddc57e5SXin Li S->outcome = 0; /* sum of squares almost zero, nothing to do */
251*5ddc57e5SXin Li goto terminate;
252*5ddc57e5SXin Li }
253*5ddc57e5SXin Li
254*5ddc57e5SXin Li /*** The outer loop: compute gradient, then descend. ***/
255*5ddc57e5SXin Li
256*5ddc57e5SXin Li for (int outer = 0;; ++outer) {
257*5ddc57e5SXin Li
258*5ddc57e5SXin Li /** Calculate the Jacobian. **/
259*5ddc57e5SXin Li for (j = 0; j < n; j++) {
260*5ddc57e5SXin Li temp = x[j];
261*5ddc57e5SXin Li step = MAX(eps * eps, eps * fabs(temp));
262*5ddc57e5SXin Li x[j] += step; /* replace temporarily */
263*5ddc57e5SXin Li (*evaluate)(x, m, data, wf, &(S->userbreak));
264*5ddc57e5SXin Li ++(S->nfev);
265*5ddc57e5SXin Li if (S->userbreak)
266*5ddc57e5SXin Li goto terminate;
267*5ddc57e5SXin Li for (i = 0; i < m; i++)
268*5ddc57e5SXin Li fjac[j*m+i] = (wf[i] - fvec[i]) / step;
269*5ddc57e5SXin Li x[j] = temp; /* restore */
270*5ddc57e5SXin Li }
271*5ddc57e5SXin Li if (C->verbosity >= 10) {
272*5ddc57e5SXin Li /* print the entire matrix */
273*5ddc57e5SXin Li printf("\nlmmin Jacobian\n");
274*5ddc57e5SXin Li for (i = 0; i < m; i++) {
275*5ddc57e5SXin Li printf(" ");
276*5ddc57e5SXin Li for (j = 0; j < n; j++)
277*5ddc57e5SXin Li printf("%.5e ", fjac[j*m+i]);
278*5ddc57e5SXin Li printf("\n");
279*5ddc57e5SXin Li }
280*5ddc57e5SXin Li }
281*5ddc57e5SXin Li
282*5ddc57e5SXin Li /** Compute the QR factorization of the Jacobian. **/
283*5ddc57e5SXin Li
284*5ddc57e5SXin Li /* fjac is an m by n array. The upper n by n submatrix of fjac is made
285*5ddc57e5SXin Li * to contain an upper triangular matrix R with diagonal elements of
286*5ddc57e5SXin Li * nonincreasing magnitude such that
287*5ddc57e5SXin Li *
288*5ddc57e5SXin Li * P^T*(J^T*J)*P = R^T*R
289*5ddc57e5SXin Li *
290*5ddc57e5SXin Li * (NOTE: ^T stands for matrix transposition),
291*5ddc57e5SXin Li *
292*5ddc57e5SXin Li * where P is a permutation matrix and J is the final calculated
293*5ddc57e5SXin Li * Jacobian. Column j of P is column Pivot(j) of the identity matrix.
294*5ddc57e5SXin Li * The lower trapezoidal part of fjac contains information generated
295*5ddc57e5SXin Li * during the computation of R.
296*5ddc57e5SXin Li *
297*5ddc57e5SXin Li * Pivot is an integer array of length n. It defines a permutation
298*5ddc57e5SXin Li * matrix P such that jac*P = Q*R, where jac is the final calculated
299*5ddc57e5SXin Li * Jacobian, Q is orthogonal (not stored), and R is upper triangular
300*5ddc57e5SXin Li * with diagonal elements of nonincreasing magnitude. Column j of P
301*5ddc57e5SXin Li * is column Pivot(j) of the identity matrix.
302*5ddc57e5SXin Li */
303*5ddc57e5SXin Li
304*5ddc57e5SXin Li lm_qrfac(m, n, fjac, Pivot, wa1, wa2, wa3);
305*5ddc57e5SXin Li /* return values are Pivot, wa1=rdiag, wa2=acnorm */
306*5ddc57e5SXin Li
307*5ddc57e5SXin Li /** Form Q^T * fvec, and store first n components in qtf. **/
308*5ddc57e5SXin Li for (i = 0; i < m; i++)
309*5ddc57e5SXin Li wf[i] = fvec[i];
310*5ddc57e5SXin Li
311*5ddc57e5SXin Li for (j = 0; j < n; j++) {
312*5ddc57e5SXin Li temp3 = fjac[j*m+j];
313*5ddc57e5SXin Li if (temp3 != 0) {
314*5ddc57e5SXin Li sum = 0;
315*5ddc57e5SXin Li for (i = j; i < m; i++)
316*5ddc57e5SXin Li sum += fjac[j*m+i] * wf[i];
317*5ddc57e5SXin Li temp = -sum / temp3;
318*5ddc57e5SXin Li for (i = j; i < m; i++)
319*5ddc57e5SXin Li wf[i] += fjac[j*m+i] * temp;
320*5ddc57e5SXin Li }
321*5ddc57e5SXin Li fjac[j*m+j] = wa1[j];
322*5ddc57e5SXin Li qtf[j] = wf[j];
323*5ddc57e5SXin Li }
324*5ddc57e5SXin Li
325*5ddc57e5SXin Li /** Compute norm of scaled gradient and detect degeneracy. **/
326*5ddc57e5SXin Li gnorm = 0;
327*5ddc57e5SXin Li for (j = 0; j < n; j++) {
328*5ddc57e5SXin Li if (wa2[Pivot[j]] == 0)
329*5ddc57e5SXin Li continue;
330*5ddc57e5SXin Li sum = 0;
331*5ddc57e5SXin Li for (i = 0; i <= j; i++)
332*5ddc57e5SXin Li sum += fjac[j*m+i] * qtf[i];
333*5ddc57e5SXin Li gnorm = MAX(gnorm, fabs(sum / wa2[Pivot[j]] / fnorm));
334*5ddc57e5SXin Li }
335*5ddc57e5SXin Li
336*5ddc57e5SXin Li if (gnorm <= C->gtol) {
337*5ddc57e5SXin Li S->outcome = 4;
338*5ddc57e5SXin Li goto terminate;
339*5ddc57e5SXin Li }
340*5ddc57e5SXin Li
341*5ddc57e5SXin Li /** Initialize or update diag and delta. **/
342*5ddc57e5SXin Li if (!outer) { /* first iteration only */
343*5ddc57e5SXin Li if (C->scale_diag) {
344*5ddc57e5SXin Li /* diag := norms of the columns of the initial Jacobian */
345*5ddc57e5SXin Li for (j = 0; j < n; j++)
346*5ddc57e5SXin Li diag[j] = wa2[j] ? wa2[j] : 1;
347*5ddc57e5SXin Li /* xnorm := || D x || */
348*5ddc57e5SXin Li for (j = 0; j < n; j++)
349*5ddc57e5SXin Li wa3[j] = diag[j] * x[j];
350*5ddc57e5SXin Li xnorm = lm_enorm(n, wa3);
351*5ddc57e5SXin Li if (C->verbosity >= 2) {
352*5ddc57e5SXin Li fprintf(msgfile, "lmmin diag ");
353*5ddc57e5SXin Li lm_print_pars(nout, x, msgfile); // xnorm
354*5ddc57e5SXin Li fprintf(msgfile, " xnorm = %18.8g\n", xnorm);
355*5ddc57e5SXin Li }
356*5ddc57e5SXin Li /* Only now print the header for the loop table. */
357*5ddc57e5SXin Li if (C->verbosity >= 3) {
358*5ddc57e5SXin Li fprintf(msgfile, " o i lmpar prered"
359*5ddc57e5SXin Li " ratio dirder delta"
360*5ddc57e5SXin Li " pnorm fnorm");
361*5ddc57e5SXin Li for (i = 0; i < nout; ++i)
362*5ddc57e5SXin Li fprintf(msgfile, " p%i", i);
363*5ddc57e5SXin Li fprintf(msgfile, "\n");
364*5ddc57e5SXin Li }
365*5ddc57e5SXin Li } else {
366*5ddc57e5SXin Li xnorm = lm_enorm(n, x);
367*5ddc57e5SXin Li }
368*5ddc57e5SXin Li if (!isfinite(xnorm)) {
369*5ddc57e5SXin Li S->outcome = 12; /* nan */
370*5ddc57e5SXin Li goto terminate;
371*5ddc57e5SXin Li }
372*5ddc57e5SXin Li /* Initialize the step bound delta. */
373*5ddc57e5SXin Li if (xnorm)
374*5ddc57e5SXin Li delta = C->stepbound * xnorm;
375*5ddc57e5SXin Li else
376*5ddc57e5SXin Li delta = C->stepbound;
377*5ddc57e5SXin Li } else {
378*5ddc57e5SXin Li if (C->scale_diag) {
379*5ddc57e5SXin Li for (j = 0; j < n; j++)
380*5ddc57e5SXin Li diag[j] = MAX(diag[j], wa2[j]);
381*5ddc57e5SXin Li }
382*5ddc57e5SXin Li }
383*5ddc57e5SXin Li
384*5ddc57e5SXin Li /** The inner loop. **/
385*5ddc57e5SXin Li int inner = 0;
386*5ddc57e5SXin Li do {
387*5ddc57e5SXin Li
388*5ddc57e5SXin Li /** Determine the Levenberg-Marquardt parameter. **/
389*5ddc57e5SXin Li lm_lmpar(n, fjac, m, Pivot, diag, qtf, delta, &lmpar,
390*5ddc57e5SXin Li wa1, wa2, wf, wa3);
391*5ddc57e5SXin Li /* used return values are fjac (partly), lmpar, wa1=x, wa3=diag*x */
392*5ddc57e5SXin Li
393*5ddc57e5SXin Li /* Predict scaled reduction. */
394*5ddc57e5SXin Li pnorm = lm_enorm(n, wa3);
395*5ddc57e5SXin Li if (!isfinite(pnorm)) {
396*5ddc57e5SXin Li S->outcome = 12; /* nan */
397*5ddc57e5SXin Li goto terminate;
398*5ddc57e5SXin Li }
399*5ddc57e5SXin Li temp2 = lmpar * SQR(pnorm / fnorm);
400*5ddc57e5SXin Li for (j = 0; j < n; j++) {
401*5ddc57e5SXin Li wa3[j] = 0;
402*5ddc57e5SXin Li for (i = 0; i <= j; i++)
403*5ddc57e5SXin Li wa3[i] -= fjac[j*m+i] * wa1[Pivot[j]];
404*5ddc57e5SXin Li }
405*5ddc57e5SXin Li temp1 = SQR(lm_enorm(n, wa3) / fnorm);
406*5ddc57e5SXin Li if (!isfinite(temp1)) {
407*5ddc57e5SXin Li S->outcome = 12; /* nan */
408*5ddc57e5SXin Li goto terminate;
409*5ddc57e5SXin Li }
410*5ddc57e5SXin Li prered = temp1 + 2*temp2;
411*5ddc57e5SXin Li dirder = -temp1 + temp2; /* scaled directional derivative */
412*5ddc57e5SXin Li
413*5ddc57e5SXin Li /* At first call, adjust the initial step bound. */
414*5ddc57e5SXin Li if (!outer && pnorm < delta)
415*5ddc57e5SXin Li delta = pnorm;
416*5ddc57e5SXin Li
417*5ddc57e5SXin Li /** Evaluate the function at x + p. **/
418*5ddc57e5SXin Li for (j = 0; j < n; j++)
419*5ddc57e5SXin Li wa2[j] = x[j] - wa1[j];
420*5ddc57e5SXin Li (*evaluate)(wa2, m, data, wf, &(S->userbreak));
421*5ddc57e5SXin Li ++(S->nfev);
422*5ddc57e5SXin Li if (S->userbreak)
423*5ddc57e5SXin Li goto terminate;
424*5ddc57e5SXin Li fnorm1 = lm_enorm(m, wf);
425*5ddc57e5SXin Li if (!isfinite(fnorm1)) {
426*5ddc57e5SXin Li S->outcome = 12; /* nan */
427*5ddc57e5SXin Li goto terminate;
428*5ddc57e5SXin Li }
429*5ddc57e5SXin Li
430*5ddc57e5SXin Li /** Evaluate the scaled reduction. **/
431*5ddc57e5SXin Li
432*5ddc57e5SXin Li /* Actual scaled reduction. */
433*5ddc57e5SXin Li actred = 1 - SQR(fnorm1 / fnorm);
434*5ddc57e5SXin Li
435*5ddc57e5SXin Li /* Ratio of actual to predicted reduction. */
436*5ddc57e5SXin Li ratio = prered ? actred / prered : 0;
437*5ddc57e5SXin Li
438*5ddc57e5SXin Li if (C->verbosity == 2) {
439*5ddc57e5SXin Li fprintf(msgfile, "lmmin (%i:%i) ", outer, inner);
440*5ddc57e5SXin Li lm_print_pars(nout, wa2, msgfile); // fnorm1,
441*5ddc57e5SXin Li } else if (C->verbosity >= 3) {
442*5ddc57e5SXin Li printf("%3i %2i %9.2g %9.2g %14.6g"
443*5ddc57e5SXin Li " %9.2g %10.3e %10.3e %21.15e",
444*5ddc57e5SXin Li outer, inner, lmpar, prered, ratio,
445*5ddc57e5SXin Li dirder, delta, pnorm, fnorm1);
446*5ddc57e5SXin Li for (i = 0; i < nout; ++i)
447*5ddc57e5SXin Li fprintf(msgfile, " %16.9g", wa2[i]);
448*5ddc57e5SXin Li fprintf(msgfile, "\n");
449*5ddc57e5SXin Li }
450*5ddc57e5SXin Li
451*5ddc57e5SXin Li /* Update the step bound. */
452*5ddc57e5SXin Li if (ratio <= 0.25) {
453*5ddc57e5SXin Li if (actred >= 0)
454*5ddc57e5SXin Li temp = 0.5;
455*5ddc57e5SXin Li else if (actred > -99) /* -99 = 1-1/0.1^2 */
456*5ddc57e5SXin Li temp = MAX(dirder / (2*dirder + actred), 0.1);
457*5ddc57e5SXin Li else
458*5ddc57e5SXin Li temp = 0.1;
459*5ddc57e5SXin Li delta = temp * MIN(delta, pnorm / 0.1);
460*5ddc57e5SXin Li lmpar /= temp;
461*5ddc57e5SXin Li } else if (ratio >= 0.75) {
462*5ddc57e5SXin Li delta = 2 * pnorm;
463*5ddc57e5SXin Li lmpar *= 0.5;
464*5ddc57e5SXin Li } else if (!lmpar) {
465*5ddc57e5SXin Li delta = 2 * pnorm;
466*5ddc57e5SXin Li }
467*5ddc57e5SXin Li
468*5ddc57e5SXin Li /** On success, update solution, and test for convergence. **/
469*5ddc57e5SXin Li
470*5ddc57e5SXin Li inner_success = ratio >= 1e-4;
471*5ddc57e5SXin Li if (inner_success) {
472*5ddc57e5SXin Li
473*5ddc57e5SXin Li /* Update x, fvec, and their norms. */
474*5ddc57e5SXin Li if (C->scale_diag) {
475*5ddc57e5SXin Li for (j = 0; j < n; j++) {
476*5ddc57e5SXin Li x[j] = wa2[j];
477*5ddc57e5SXin Li wa2[j] = diag[j] * x[j];
478*5ddc57e5SXin Li }
479*5ddc57e5SXin Li } else {
480*5ddc57e5SXin Li for (j = 0; j < n; j++)
481*5ddc57e5SXin Li x[j] = wa2[j];
482*5ddc57e5SXin Li }
483*5ddc57e5SXin Li for (i = 0; i < m; i++)
484*5ddc57e5SXin Li fvec[i] = wf[i];
485*5ddc57e5SXin Li xnorm = lm_enorm(n, wa2);
486*5ddc57e5SXin Li if (!isfinite(xnorm)) {
487*5ddc57e5SXin Li S->outcome = 12; /* nan */
488*5ddc57e5SXin Li goto terminate;
489*5ddc57e5SXin Li }
490*5ddc57e5SXin Li fnorm = fnorm1;
491*5ddc57e5SXin Li }
492*5ddc57e5SXin Li
493*5ddc57e5SXin Li /* Convergence tests. */
494*5ddc57e5SXin Li S->outcome = 0;
495*5ddc57e5SXin Li if (fnorm <= LM_DWARF)
496*5ddc57e5SXin Li goto terminate; /* success: sum of squares almost zero */
497*5ddc57e5SXin Li /* Test two criteria (both may be fulfilled). */
498*5ddc57e5SXin Li if (fabs(actred) <= C->ftol && prered <= C->ftol && ratio <= 2)
499*5ddc57e5SXin Li S->outcome = 1; /* success: x almost stable */
500*5ddc57e5SXin Li if (delta <= C->xtol * xnorm)
501*5ddc57e5SXin Li S->outcome += 2; /* success: sum of squares almost stable */
502*5ddc57e5SXin Li if (S->outcome != 0) {
503*5ddc57e5SXin Li goto terminate;
504*5ddc57e5SXin Li }
505*5ddc57e5SXin Li
506*5ddc57e5SXin Li /** Tests for termination and stringent tolerances. **/
507*5ddc57e5SXin Li if (S->nfev >= maxfev) {
508*5ddc57e5SXin Li S->outcome = 5;
509*5ddc57e5SXin Li goto terminate;
510*5ddc57e5SXin Li }
511*5ddc57e5SXin Li if (fabs(actred) <= LM_MACHEP && prered <= LM_MACHEP &&
512*5ddc57e5SXin Li ratio <= 2) {
513*5ddc57e5SXin Li S->outcome = 6;
514*5ddc57e5SXin Li goto terminate;
515*5ddc57e5SXin Li }
516*5ddc57e5SXin Li if (delta <= LM_MACHEP * xnorm) {
517*5ddc57e5SXin Li S->outcome = 7;
518*5ddc57e5SXin Li goto terminate;
519*5ddc57e5SXin Li }
520*5ddc57e5SXin Li if (gnorm <= LM_MACHEP) {
521*5ddc57e5SXin Li S->outcome = 8;
522*5ddc57e5SXin Li goto terminate;
523*5ddc57e5SXin Li }
524*5ddc57e5SXin Li
525*5ddc57e5SXin Li /** End of the inner loop. Repeat if iteration unsuccessful. **/
526*5ddc57e5SXin Li ++inner;
527*5ddc57e5SXin Li } while (!inner_success);
528*5ddc57e5SXin Li
529*5ddc57e5SXin Li }; /*** End of the outer loop. ***/
530*5ddc57e5SXin Li
531*5ddc57e5SXin Li terminate:
532*5ddc57e5SXin Li S->fnorm = lm_enorm(m, fvec);
533*5ddc57e5SXin Li if (C->verbosity >= 2)
534*5ddc57e5SXin Li printf("lmmin outcome (%i) xnorm %g ftol %g xtol %g\n", S->outcome,
535*5ddc57e5SXin Li xnorm, C->ftol, C->xtol);
536*5ddc57e5SXin Li if (C->verbosity & 1) {
537*5ddc57e5SXin Li fprintf(msgfile, "lmmin final ");
538*5ddc57e5SXin Li lm_print_pars(nout, x, msgfile); // S->fnorm,
539*5ddc57e5SXin Li fprintf(msgfile, " fnorm = %18.8g\n", S->fnorm);
540*5ddc57e5SXin Li }
541*5ddc57e5SXin Li if (S->userbreak) /* user-requested break */
542*5ddc57e5SXin Li S->outcome = 11;
543*5ddc57e5SXin Li
544*5ddc57e5SXin Li /*** Deallocate the workspace. ***/
545*5ddc57e5SXin Li free(ws);
546*5ddc57e5SXin Li
547*5ddc57e5SXin Li } /*** lmmin. ***/
548*5ddc57e5SXin Li
549*5ddc57e5SXin Li /******************************************************************************/
550*5ddc57e5SXin Li /* lm_lmpar (determine Levenberg-Marquardt parameter) */
551*5ddc57e5SXin Li /******************************************************************************/
552*5ddc57e5SXin Li
lm_lmpar(const int n,double * r,const int ldr,const int * Pivot,const double * diag,const double * qtb,const double delta,double * par,double * x,double * Sdiag,double * aux,double * xdi)553*5ddc57e5SXin Li void lm_lmpar(const int n, double* r, const int ldr, const int* Pivot,
554*5ddc57e5SXin Li const double* diag, const double* qtb, const double delta,
555*5ddc57e5SXin Li double* par, double* x, double* Sdiag, double* aux, double* xdi)
556*5ddc57e5SXin Li /* Given an m by n matrix A, an n by n nonsingular diagonal matrix D,
557*5ddc57e5SXin Li * an m-vector b, and a positive number delta, the problem is to
558*5ddc57e5SXin Li * determine a parameter value par such that if x solves the system
559*5ddc57e5SXin Li *
560*5ddc57e5SXin Li * A*x = b and sqrt(par)*D*x = 0
561*5ddc57e5SXin Li *
562*5ddc57e5SXin Li * in the least squares sense, and dxnorm is the Euclidean norm of D*x,
563*5ddc57e5SXin Li * then either par=0 and (dxnorm-delta) < 0.1*delta, or par>0 and
564*5ddc57e5SXin Li * abs(dxnorm-delta) < 0.1*delta.
565*5ddc57e5SXin Li *
566*5ddc57e5SXin Li * Using lm_qrsolv, this subroutine completes the solution of the
567*5ddc57e5SXin Li * problem if it is provided with the necessary information from the
568*5ddc57e5SXin Li * QR factorization, with column pivoting, of A. That is, if A*P = Q*R,
569*5ddc57e5SXin Li * where P is a permutation matrix, Q has orthogonal columns, and R is
570*5ddc57e5SXin Li * an upper triangular matrix with diagonal elements of nonincreasing
571*5ddc57e5SXin Li * magnitude, then lmpar expects the full upper triangle of R, the
572*5ddc57e5SXin Li * permutation matrix P, and the first n components of Q^T*b. On output
573*5ddc57e5SXin Li * lmpar also provides an upper triangular matrix S such that
574*5ddc57e5SXin Li *
575*5ddc57e5SXin Li * P^T*(A^T*A + par*D*D)*P = S^T*S.
576*5ddc57e5SXin Li *
577*5ddc57e5SXin Li * S is employed within lmpar and may be of separate interest.
578*5ddc57e5SXin Li *
579*5ddc57e5SXin Li * Only a few iterations are generally needed for convergence of the
580*5ddc57e5SXin Li * algorithm. If, however, the limit of 10 iterations is reached, then
581*5ddc57e5SXin Li * the output par will contain the best value obtained so far.
582*5ddc57e5SXin Li *
583*5ddc57e5SXin Li * Parameters:
584*5ddc57e5SXin Li *
585*5ddc57e5SXin Li * n is a positive integer INPUT variable set to the order of r.
586*5ddc57e5SXin Li *
587*5ddc57e5SXin Li * r is an n by n array. On INPUT the full upper triangle must contain
588*5ddc57e5SXin Li * the full upper triangle of the matrix R. On OUTPUT the full upper
589*5ddc57e5SXin Li * triangle is unaltered, and the strict lower triangle contains the
590*5ddc57e5SXin Li * strict upper triangle (transposed) of the upper triangular matrix S.
591*5ddc57e5SXin Li *
592*5ddc57e5SXin Li * ldr is a positive integer INPUT variable not less than n which
593*5ddc57e5SXin Li * specifies the leading dimension of the array R.
594*5ddc57e5SXin Li *
595*5ddc57e5SXin Li * Pivot is an integer INPUT array of length n which defines the
596*5ddc57e5SXin Li * permutation matrix P such that A*P = Q*R. Column j of P is column
597*5ddc57e5SXin Li * Pivot(j) of the identity matrix.
598*5ddc57e5SXin Li *
599*5ddc57e5SXin Li * diag is an INPUT array of length n which must contain the diagonal
600*5ddc57e5SXin Li * elements of the matrix D.
601*5ddc57e5SXin Li *
602*5ddc57e5SXin Li * qtb is an INPUT array of length n which must contain the first
603*5ddc57e5SXin Li * n elements of the vector Q^T*b.
604*5ddc57e5SXin Li *
605*5ddc57e5SXin Li * delta is a positive INPUT variable which specifies an upper bound
606*5ddc57e5SXin Li * on the Euclidean norm of D*x.
607*5ddc57e5SXin Li *
608*5ddc57e5SXin Li * par is a nonnegative variable. On INPUT par contains an initial
609*5ddc57e5SXin Li * estimate of the Levenberg-Marquardt parameter. On OUTPUT par
610*5ddc57e5SXin Li * contains the final estimate.
611*5ddc57e5SXin Li *
612*5ddc57e5SXin Li * x is an OUTPUT array of length n which contains the least-squares
613*5ddc57e5SXin Li * solution of the system A*x = b, sqrt(par)*D*x = 0, for the output par.
614*5ddc57e5SXin Li *
615*5ddc57e5SXin Li * Sdiag is an array of length n needed as workspace; on OUTPUT it
616*5ddc57e5SXin Li * contains the diagonal elements of the upper triangular matrix S.
617*5ddc57e5SXin Li *
618*5ddc57e5SXin Li * aux is a multi-purpose work array of length n.
619*5ddc57e5SXin Li *
620*5ddc57e5SXin Li * xdi is a work array of length n. On OUTPUT: diag[j] * x[j].
621*5ddc57e5SXin Li *
622*5ddc57e5SXin Li */
623*5ddc57e5SXin Li {
624*5ddc57e5SXin Li int i, iter, j, nsing;
625*5ddc57e5SXin Li double dxnorm, fp, fp_old, gnorm, parc, parl, paru;
626*5ddc57e5SXin Li double sum, temp;
627*5ddc57e5SXin Li static double p1 = 0.1;
628*5ddc57e5SXin Li
629*5ddc57e5SXin Li /*** Compute and store in x the Gauss-Newton direction. If the Jacobian
630*5ddc57e5SXin Li is rank-deficient, obtain a least-squares solution. ***/
631*5ddc57e5SXin Li
632*5ddc57e5SXin Li nsing = n;
633*5ddc57e5SXin Li for (j = 0; j < n; j++) {
634*5ddc57e5SXin Li aux[j] = qtb[j];
635*5ddc57e5SXin Li if (r[j*ldr+j] == 0 && nsing == n)
636*5ddc57e5SXin Li nsing = j;
637*5ddc57e5SXin Li if (nsing < n)
638*5ddc57e5SXin Li aux[j] = 0;
639*5ddc57e5SXin Li }
640*5ddc57e5SXin Li for (j = nsing-1; j >= 0; j--) {
641*5ddc57e5SXin Li aux[j] = aux[j] / r[j+ldr*j];
642*5ddc57e5SXin Li temp = aux[j];
643*5ddc57e5SXin Li for (i = 0; i < j; i++)
644*5ddc57e5SXin Li aux[i] -= r[j*ldr+i] * temp;
645*5ddc57e5SXin Li }
646*5ddc57e5SXin Li
647*5ddc57e5SXin Li for (j = 0; j < n; j++)
648*5ddc57e5SXin Li x[Pivot[j]] = aux[j];
649*5ddc57e5SXin Li
650*5ddc57e5SXin Li /*** Initialize the iteration counter, evaluate the function at the origin,
651*5ddc57e5SXin Li and test for acceptance of the Gauss-Newton direction. ***/
652*5ddc57e5SXin Li
653*5ddc57e5SXin Li for (j = 0; j < n; j++)
654*5ddc57e5SXin Li xdi[j] = diag[j] * x[j];
655*5ddc57e5SXin Li dxnorm = lm_enorm(n, xdi);
656*5ddc57e5SXin Li fp = dxnorm - delta;
657*5ddc57e5SXin Li if (fp <= p1 * delta) {
658*5ddc57e5SXin Li #ifdef LMFIT_DEBUG_MESSAGES
659*5ddc57e5SXin Li printf("debug lmpar nsing=%d, n=%d, terminate[fp<=p1*del]\n", nsing, n);
660*5ddc57e5SXin Li #endif
661*5ddc57e5SXin Li *par = 0;
662*5ddc57e5SXin Li return;
663*5ddc57e5SXin Li }
664*5ddc57e5SXin Li
665*5ddc57e5SXin Li /*** If the Jacobian is not rank deficient, the Newton step provides a
666*5ddc57e5SXin Li lower bound, parl, for the zero of the function. Otherwise set this
667*5ddc57e5SXin Li bound to zero. ***/
668*5ddc57e5SXin Li
669*5ddc57e5SXin Li parl = 0;
670*5ddc57e5SXin Li if (nsing >= n) {
671*5ddc57e5SXin Li for (j = 0; j < n; j++)
672*5ddc57e5SXin Li aux[j] = diag[Pivot[j]] * xdi[Pivot[j]] / dxnorm;
673*5ddc57e5SXin Li
674*5ddc57e5SXin Li for (j = 0; j < n; j++) {
675*5ddc57e5SXin Li sum = 0;
676*5ddc57e5SXin Li for (i = 0; i < j; i++)
677*5ddc57e5SXin Li sum += r[j*ldr+i] * aux[i];
678*5ddc57e5SXin Li aux[j] = (aux[j] - sum) / r[j+ldr*j];
679*5ddc57e5SXin Li }
680*5ddc57e5SXin Li temp = lm_enorm(n, aux);
681*5ddc57e5SXin Li parl = fp / delta / temp / temp;
682*5ddc57e5SXin Li }
683*5ddc57e5SXin Li
684*5ddc57e5SXin Li /*** Calculate an upper bound, paru, for the zero of the function. ***/
685*5ddc57e5SXin Li
686*5ddc57e5SXin Li for (j = 0; j < n; j++) {
687*5ddc57e5SXin Li sum = 0;
688*5ddc57e5SXin Li for (i = 0; i <= j; i++)
689*5ddc57e5SXin Li sum += r[j*ldr+i] * qtb[i];
690*5ddc57e5SXin Li aux[j] = sum / diag[Pivot[j]];
691*5ddc57e5SXin Li }
692*5ddc57e5SXin Li gnorm = lm_enorm(n, aux);
693*5ddc57e5SXin Li paru = gnorm / delta;
694*5ddc57e5SXin Li if (paru == 0)
695*5ddc57e5SXin Li paru = LM_DWARF / MIN(delta, p1);
696*5ddc57e5SXin Li
697*5ddc57e5SXin Li /*** If the input par lies outside of the interval (parl,paru),
698*5ddc57e5SXin Li set par to the closer endpoint. ***/
699*5ddc57e5SXin Li
700*5ddc57e5SXin Li *par = MAX(*par, parl);
701*5ddc57e5SXin Li *par = MIN(*par, paru);
702*5ddc57e5SXin Li if (*par == 0)
703*5ddc57e5SXin Li *par = gnorm / dxnorm;
704*5ddc57e5SXin Li
705*5ddc57e5SXin Li /*** Iterate. ***/
706*5ddc57e5SXin Li
707*5ddc57e5SXin Li for (iter = 0;; iter++) {
708*5ddc57e5SXin Li
709*5ddc57e5SXin Li /** Evaluate the function at the current value of par. **/
710*5ddc57e5SXin Li if (*par == 0)
711*5ddc57e5SXin Li *par = MAX(LM_DWARF, 0.001 * paru);
712*5ddc57e5SXin Li temp = sqrt(*par);
713*5ddc57e5SXin Li for (j = 0; j < n; j++)
714*5ddc57e5SXin Li aux[j] = temp * diag[j];
715*5ddc57e5SXin Li
716*5ddc57e5SXin Li lm_qrsolv(n, r, ldr, Pivot, aux, qtb, x, Sdiag, xdi);
717*5ddc57e5SXin Li /* return values are r, x, Sdiag */
718*5ddc57e5SXin Li
719*5ddc57e5SXin Li for (j = 0; j < n; j++)
720*5ddc57e5SXin Li xdi[j] = diag[j] * x[j]; /* used as output */
721*5ddc57e5SXin Li dxnorm = lm_enorm(n, xdi);
722*5ddc57e5SXin Li fp_old = fp;
723*5ddc57e5SXin Li fp = dxnorm - delta;
724*5ddc57e5SXin Li
725*5ddc57e5SXin Li /** If the function is small enough, accept the current value
726*5ddc57e5SXin Li of par. Also test for the exceptional cases where parl
727*5ddc57e5SXin Li is zero or the number of iterations has reached 10. **/
728*5ddc57e5SXin Li if (fabs(fp) <= p1 * delta ||
729*5ddc57e5SXin Li (parl == 0 && fp <= fp_old && fp_old < 0) || iter == 10) {
730*5ddc57e5SXin Li #ifdef LMFIT_DEBUG_MESSAGES
731*5ddc57e5SXin Li printf("debug lmpar nsing=%d, iter=%d, "
732*5ddc57e5SXin Li "par=%.4e [%.4e %.4e], delta=%.4e, fp=%.4e\n",
733*5ddc57e5SXin Li nsing, iter, *par, parl, paru, delta, fp);
734*5ddc57e5SXin Li #endif
735*5ddc57e5SXin Li break; /* the only exit from the iteration. */
736*5ddc57e5SXin Li }
737*5ddc57e5SXin Li
738*5ddc57e5SXin Li /** Compute the Newton correction. **/
739*5ddc57e5SXin Li for (j = 0; j < n; j++)
740*5ddc57e5SXin Li aux[j] = diag[Pivot[j]] * xdi[Pivot[j]] / dxnorm;
741*5ddc57e5SXin Li
742*5ddc57e5SXin Li for (j = 0; j < n; j++) {
743*5ddc57e5SXin Li aux[j] = aux[j] / Sdiag[j];
744*5ddc57e5SXin Li for (i = j+1; i < n; i++)
745*5ddc57e5SXin Li aux[i] -= r[j*ldr+i] * aux[j];
746*5ddc57e5SXin Li }
747*5ddc57e5SXin Li temp = lm_enorm(n, aux);
748*5ddc57e5SXin Li parc = fp / delta / temp / temp;
749*5ddc57e5SXin Li
750*5ddc57e5SXin Li /** Depending on the sign of the function, update parl or paru. **/
751*5ddc57e5SXin Li if (fp > 0)
752*5ddc57e5SXin Li parl = MAX(parl, *par);
753*5ddc57e5SXin Li else /* fp < 0 [the case fp==0 is precluded by the break condition] */
754*5ddc57e5SXin Li paru = MIN(paru, *par);
755*5ddc57e5SXin Li
756*5ddc57e5SXin Li /** Compute an improved estimate for par. **/
757*5ddc57e5SXin Li *par = MAX(parl, *par + parc);
758*5ddc57e5SXin Li }
759*5ddc57e5SXin Li
760*5ddc57e5SXin Li } /*** lm_lmpar. ***/
761*5ddc57e5SXin Li
762*5ddc57e5SXin Li /******************************************************************************/
763*5ddc57e5SXin Li /* lm_qrfac (QR factorization, from lapack) */
764*5ddc57e5SXin Li /******************************************************************************/
765*5ddc57e5SXin Li
lm_qrfac(const int m,const int n,double * A,int * Pivot,double * Rdiag,double * Acnorm,double * W)766*5ddc57e5SXin Li void lm_qrfac(const int m, const int n, double* A, int* Pivot, double* Rdiag,
767*5ddc57e5SXin Li double* Acnorm, double* W)
768*5ddc57e5SXin Li /*
769*5ddc57e5SXin Li * This subroutine uses Householder transformations with column pivoting
770*5ddc57e5SXin Li * to compute a QR factorization of the m by n matrix A. That is, qrfac
771*5ddc57e5SXin Li * determines an orthogonal matrix Q, a permutation matrix P, and an
772*5ddc57e5SXin Li * upper trapezoidal matrix R with diagonal elements of nonincreasing
773*5ddc57e5SXin Li * magnitude, such that A*P = Q*R. The Householder transformation for
774*5ddc57e5SXin Li * column k, k = 1,2,...,n, is of the form
775*5ddc57e5SXin Li *
776*5ddc57e5SXin Li * I - 2*w*wT/|w|^2
777*5ddc57e5SXin Li *
778*5ddc57e5SXin Li * where w has zeroes in the first k-1 positions.
779*5ddc57e5SXin Li *
780*5ddc57e5SXin Li * Parameters:
781*5ddc57e5SXin Li *
782*5ddc57e5SXin Li * m is an INPUT parameter set to the number of rows of A.
783*5ddc57e5SXin Li *
784*5ddc57e5SXin Li * n is an INPUT parameter set to the number of columns of A.
785*5ddc57e5SXin Li *
786*5ddc57e5SXin Li * A is an m by n array. On INPUT, A contains the matrix for which the
787*5ddc57e5SXin Li * QR factorization is to be computed. On OUTPUT the strict upper
788*5ddc57e5SXin Li * trapezoidal part of A contains the strict upper trapezoidal part
789*5ddc57e5SXin Li * of R, and the lower trapezoidal part of A contains a factored form
790*5ddc57e5SXin Li * of Q (the non-trivial elements of the vectors w described above).
791*5ddc57e5SXin Li *
792*5ddc57e5SXin Li * Pivot is an integer OUTPUT array of length n that describes the
793*5ddc57e5SXin Li * permutation matrix P. Column j of P is column Pivot(j) of the
794*5ddc57e5SXin Li * identity matrix.
795*5ddc57e5SXin Li *
796*5ddc57e5SXin Li * Rdiag is an OUTPUT array of length n which contains the diagonal
797*5ddc57e5SXin Li * elements of R.
798*5ddc57e5SXin Li *
799*5ddc57e5SXin Li * Acnorm is an OUTPUT array of length n which contains the norms of
800*5ddc57e5SXin Li * the corresponding columns of the input matrix A. If this information
801*5ddc57e5SXin Li * is not needed, then Acnorm can share storage with Rdiag.
802*5ddc57e5SXin Li *
803*5ddc57e5SXin Li * W is a work array of length n.
804*5ddc57e5SXin Li *
805*5ddc57e5SXin Li */
806*5ddc57e5SXin Li {
807*5ddc57e5SXin Li int i, j, k, kmax;
808*5ddc57e5SXin Li double ajnorm, sum, temp;
809*5ddc57e5SXin Li
810*5ddc57e5SXin Li #ifdef LMFIT_DEBUG_MESSAGES
811*5ddc57e5SXin Li printf("debug qrfac\n");
812*5ddc57e5SXin Li #endif
813*5ddc57e5SXin Li
814*5ddc57e5SXin Li /** Compute initial column norms;
815*5ddc57e5SXin Li initialize Pivot with identity permutation. ***/
816*5ddc57e5SXin Li for (j = 0; j < n; j++) {
817*5ddc57e5SXin Li W[j] = Rdiag[j] = Acnorm[j] = lm_enorm(m, &A[j*m]);
818*5ddc57e5SXin Li Pivot[j] = j;
819*5ddc57e5SXin Li }
820*5ddc57e5SXin Li
821*5ddc57e5SXin Li /** Loop over columns of A. **/
822*5ddc57e5SXin Li assert(n <= m);
823*5ddc57e5SXin Li for (j = 0; j < n; j++) {
824*5ddc57e5SXin Li
825*5ddc57e5SXin Li /** Bring the column of largest norm into the pivot position. **/
826*5ddc57e5SXin Li kmax = j;
827*5ddc57e5SXin Li for (k = j+1; k < n; k++)
828*5ddc57e5SXin Li if (Rdiag[k] > Rdiag[kmax])
829*5ddc57e5SXin Li kmax = k;
830*5ddc57e5SXin Li
831*5ddc57e5SXin Li if (kmax != j) {
832*5ddc57e5SXin Li /* Swap columns j and kmax. */
833*5ddc57e5SXin Li k = Pivot[j];
834*5ddc57e5SXin Li Pivot[j] = Pivot[kmax];
835*5ddc57e5SXin Li Pivot[kmax] = k;
836*5ddc57e5SXin Li for (i = 0; i < m; i++) {
837*5ddc57e5SXin Li temp = A[j*m+i];
838*5ddc57e5SXin Li A[j*m+i] = A[kmax*m+i];
839*5ddc57e5SXin Li A[kmax*m+i] = temp;
840*5ddc57e5SXin Li }
841*5ddc57e5SXin Li /* Half-swap: Rdiag[j], W[j] won't be needed any further. */
842*5ddc57e5SXin Li Rdiag[kmax] = Rdiag[j];
843*5ddc57e5SXin Li W[kmax] = W[j];
844*5ddc57e5SXin Li }
845*5ddc57e5SXin Li
846*5ddc57e5SXin Li /** Compute the Householder reflection vector w_j to reduce the
847*5ddc57e5SXin Li j-th column of A to a multiple of the j-th unit vector. **/
848*5ddc57e5SXin Li ajnorm = lm_enorm(m-j, &A[j*m+j]);
849*5ddc57e5SXin Li if (ajnorm == 0) {
850*5ddc57e5SXin Li Rdiag[j] = 0;
851*5ddc57e5SXin Li continue;
852*5ddc57e5SXin Li }
853*5ddc57e5SXin Li
854*5ddc57e5SXin Li /* Let the partial column vector A[j][j:] contain w_j := e_j+-a_j/|a_j|,
855*5ddc57e5SXin Li where the sign +- is chosen to avoid cancellation in w_jj. */
856*5ddc57e5SXin Li if (A[j*m+j] < 0)
857*5ddc57e5SXin Li ajnorm = -ajnorm;
858*5ddc57e5SXin Li for (i = j; i < m; i++)
859*5ddc57e5SXin Li A[j*m+i] /= ajnorm;
860*5ddc57e5SXin Li A[j*m+j] += 1;
861*5ddc57e5SXin Li
862*5ddc57e5SXin Li /** Apply the Householder transformation U_w := 1 - 2*w_j.w_j/|w_j|^2
863*5ddc57e5SXin Li to the remaining columns, and update the norms. **/
864*5ddc57e5SXin Li for (k = j+1; k < n; k++) {
865*5ddc57e5SXin Li /* Compute scalar product w_j * a_j. */
866*5ddc57e5SXin Li sum = 0;
867*5ddc57e5SXin Li for (i = j; i < m; i++)
868*5ddc57e5SXin Li sum += A[j*m+i] * A[k*m+i];
869*5ddc57e5SXin Li
870*5ddc57e5SXin Li /* Normalization is simplified by the coincidence |w_j|^2=2w_jj. */
871*5ddc57e5SXin Li temp = sum / A[j*m+j];
872*5ddc57e5SXin Li
873*5ddc57e5SXin Li /* Carry out transform U_w_j * a_k. */
874*5ddc57e5SXin Li for (i = j; i < m; i++)
875*5ddc57e5SXin Li A[k*m+i] -= temp * A[j*m+i];
876*5ddc57e5SXin Li
877*5ddc57e5SXin Li /* No idea what happens here. */
878*5ddc57e5SXin Li if (Rdiag[k] != 0) {
879*5ddc57e5SXin Li temp = A[m*k+j] / Rdiag[k];
880*5ddc57e5SXin Li if (fabs(temp) < 1) {
881*5ddc57e5SXin Li Rdiag[k] *= sqrt(1 - SQR(temp));
882*5ddc57e5SXin Li temp = Rdiag[k] / W[k];
883*5ddc57e5SXin Li } else
884*5ddc57e5SXin Li temp = 0;
885*5ddc57e5SXin Li if (temp == 0 || 0.05 * SQR(temp) <= LM_MACHEP) {
886*5ddc57e5SXin Li Rdiag[k] = lm_enorm(m-j-1, &A[m*k+j+1]);
887*5ddc57e5SXin Li W[k] = Rdiag[k];
888*5ddc57e5SXin Li }
889*5ddc57e5SXin Li }
890*5ddc57e5SXin Li }
891*5ddc57e5SXin Li
892*5ddc57e5SXin Li Rdiag[j] = -ajnorm;
893*5ddc57e5SXin Li }
894*5ddc57e5SXin Li } /*** lm_qrfac. ***/
895*5ddc57e5SXin Li
896*5ddc57e5SXin Li /******************************************************************************/
897*5ddc57e5SXin Li /* lm_qrsolv (linear least-squares) */
898*5ddc57e5SXin Li /******************************************************************************/
899*5ddc57e5SXin Li
lm_qrsolv(const int n,double * r,const int ldr,const int * Pivot,const double * diag,const double * qtb,double * x,double * Sdiag,double * W)900*5ddc57e5SXin Li void lm_qrsolv(const int n, double* r, const int ldr, const int* Pivot,
901*5ddc57e5SXin Li const double* diag, const double* qtb, double* x,
902*5ddc57e5SXin Li double* Sdiag, double* W)
903*5ddc57e5SXin Li /*
904*5ddc57e5SXin Li * Given an m by n matrix A, an n by n diagonal matrix D, and an
905*5ddc57e5SXin Li * m-vector b, the problem is to determine an x which solves the
906*5ddc57e5SXin Li * system
907*5ddc57e5SXin Li *
908*5ddc57e5SXin Li * A*x = b and D*x = 0
909*5ddc57e5SXin Li *
910*5ddc57e5SXin Li * in the least squares sense.
911*5ddc57e5SXin Li *
912*5ddc57e5SXin Li * This subroutine completes the solution of the problem if it is
913*5ddc57e5SXin Li * provided with the necessary information from the QR factorization,
914*5ddc57e5SXin Li * with column pivoting, of A. That is, if A*P = Q*R, where P is a
915*5ddc57e5SXin Li * permutation matrix, Q has orthogonal columns, and R is an upper
916*5ddc57e5SXin Li * triangular matrix with diagonal elements of nonincreasing magnitude,
917*5ddc57e5SXin Li * then qrsolv expects the full upper triangle of R, the permutation
918*5ddc57e5SXin Li * matrix P, and the first n components of Q^T*b. The system
919*5ddc57e5SXin Li * A*x = b, D*x = 0, is then equivalent to
920*5ddc57e5SXin Li *
921*5ddc57e5SXin Li * R*z = Q^T*b, P^T*D*P*z = 0,
922*5ddc57e5SXin Li *
923*5ddc57e5SXin Li * where x = P*z. If this system does not have full rank, then a least
924*5ddc57e5SXin Li * squares solution is obtained. On output qrsolv also provides an upper
925*5ddc57e5SXin Li * triangular matrix S such that
926*5ddc57e5SXin Li *
927*5ddc57e5SXin Li * P^T*(A^T*A + D*D)*P = S^T*S.
928*5ddc57e5SXin Li *
929*5ddc57e5SXin Li * S is computed within qrsolv and may be of separate interest.
930*5ddc57e5SXin Li *
931*5ddc57e5SXin Li * Parameters:
932*5ddc57e5SXin Li *
933*5ddc57e5SXin Li * n is a positive integer INPUT variable set to the order of R.
934*5ddc57e5SXin Li *
935*5ddc57e5SXin Li * r is an n by n array. On INPUT the full upper triangle must contain
936*5ddc57e5SXin Li * the full upper triangle of the matrix R. On OUTPUT the full upper
937*5ddc57e5SXin Li * triangle is unaltered, and the strict lower triangle contains the
938*5ddc57e5SXin Li * strict upper triangle (transposed) of the upper triangular matrix S.
939*5ddc57e5SXin Li *
940*5ddc57e5SXin Li * ldr is a positive integer INPUT variable not less than n which
941*5ddc57e5SXin Li * specifies the leading dimension of the array R.
942*5ddc57e5SXin Li *
943*5ddc57e5SXin Li * Pivot is an integer INPUT array of length n which defines the
944*5ddc57e5SXin Li * permutation matrix P such that A*P = Q*R. Column j of P is column
945*5ddc57e5SXin Li * Pivot(j) of the identity matrix.
946*5ddc57e5SXin Li *
947*5ddc57e5SXin Li * diag is an INPUT array of length n which must contain the diagonal
948*5ddc57e5SXin Li * elements of the matrix D.
949*5ddc57e5SXin Li *
950*5ddc57e5SXin Li * qtb is an INPUT array of length n which must contain the first
951*5ddc57e5SXin Li * n elements of the vector Q^T*b.
952*5ddc57e5SXin Li *
953*5ddc57e5SXin Li * x is an OUTPUT array of length n which contains the least-squares
954*5ddc57e5SXin Li * solution of the system A*x = b, D*x = 0.
955*5ddc57e5SXin Li *
956*5ddc57e5SXin Li * Sdiag is an OUTPUT array of length n which contains the diagonal
957*5ddc57e5SXin Li * elements of the upper triangular matrix S.
958*5ddc57e5SXin Li *
959*5ddc57e5SXin Li * W is a work array of length n.
960*5ddc57e5SXin Li *
961*5ddc57e5SXin Li */
962*5ddc57e5SXin Li {
963*5ddc57e5SXin Li int i, kk, j, k, nsing;
964*5ddc57e5SXin Li double qtbpj, sum, temp;
965*5ddc57e5SXin Li double _sin, _cos, _tan, _cot; /* local variables, not functions */
966*5ddc57e5SXin Li
967*5ddc57e5SXin Li /*** Copy R and Q^T*b to preserve input and initialize S.
968*5ddc57e5SXin Li In particular, save the diagonal elements of R in x. ***/
969*5ddc57e5SXin Li
970*5ddc57e5SXin Li for (j = 0; j < n; j++) {
971*5ddc57e5SXin Li for (i = j; i < n; i++)
972*5ddc57e5SXin Li r[j*ldr+i] = r[i*ldr+j];
973*5ddc57e5SXin Li x[j] = r[j*ldr+j];
974*5ddc57e5SXin Li W[j] = qtb[j];
975*5ddc57e5SXin Li }
976*5ddc57e5SXin Li
977*5ddc57e5SXin Li /*** Eliminate the diagonal matrix D using a Givens rotation. ***/
978*5ddc57e5SXin Li
979*5ddc57e5SXin Li for (j = 0; j < n; j++) {
980*5ddc57e5SXin Li
981*5ddc57e5SXin Li /*** Prepare the row of D to be eliminated, locating the diagonal
982*5ddc57e5SXin Li element using P from the QR factorization. ***/
983*5ddc57e5SXin Li
984*5ddc57e5SXin Li if (diag[Pivot[j]] != 0) {
985*5ddc57e5SXin Li for (k = j; k < n; k++)
986*5ddc57e5SXin Li Sdiag[k] = 0;
987*5ddc57e5SXin Li Sdiag[j] = diag[Pivot[j]];
988*5ddc57e5SXin Li
989*5ddc57e5SXin Li /*** The transformations to eliminate the row of D modify only
990*5ddc57e5SXin Li a single element of Q^T*b beyond the first n, which is
991*5ddc57e5SXin Li initially 0. ***/
992*5ddc57e5SXin Li
993*5ddc57e5SXin Li qtbpj = 0;
994*5ddc57e5SXin Li for (k = j; k < n; k++) {
995*5ddc57e5SXin Li
996*5ddc57e5SXin Li /** Determine a Givens rotation which eliminates the
997*5ddc57e5SXin Li appropriate element in the current row of D. **/
998*5ddc57e5SXin Li if (Sdiag[k] == 0)
999*5ddc57e5SXin Li continue;
1000*5ddc57e5SXin Li kk = k + ldr * k;
1001*5ddc57e5SXin Li if (fabs(r[kk]) < fabs(Sdiag[k])) {
1002*5ddc57e5SXin Li _cot = r[kk] / Sdiag[k];
1003*5ddc57e5SXin Li _sin = 1 / hypot(1, _cot);
1004*5ddc57e5SXin Li _cos = _sin * _cot;
1005*5ddc57e5SXin Li } else {
1006*5ddc57e5SXin Li _tan = Sdiag[k] / r[kk];
1007*5ddc57e5SXin Li _cos = 1 / hypot(1, _tan);
1008*5ddc57e5SXin Li _sin = _cos * _tan;
1009*5ddc57e5SXin Li }
1010*5ddc57e5SXin Li
1011*5ddc57e5SXin Li /** Compute the modified diagonal element of R and
1012*5ddc57e5SXin Li the modified element of (Q^T*b,0). **/
1013*5ddc57e5SXin Li r[kk] = _cos * r[kk] + _sin * Sdiag[k];
1014*5ddc57e5SXin Li temp = _cos * W[k] + _sin * qtbpj;
1015*5ddc57e5SXin Li qtbpj = -_sin * W[k] + _cos * qtbpj;
1016*5ddc57e5SXin Li W[k] = temp;
1017*5ddc57e5SXin Li
1018*5ddc57e5SXin Li /** Accumulate the tranformation in the row of S. **/
1019*5ddc57e5SXin Li for (i = k+1; i < n; i++) {
1020*5ddc57e5SXin Li temp = _cos * r[k*ldr+i] + _sin * Sdiag[i];
1021*5ddc57e5SXin Li Sdiag[i] = -_sin * r[k*ldr+i] + _cos * Sdiag[i];
1022*5ddc57e5SXin Li r[k*ldr+i] = temp;
1023*5ddc57e5SXin Li }
1024*5ddc57e5SXin Li }
1025*5ddc57e5SXin Li }
1026*5ddc57e5SXin Li
1027*5ddc57e5SXin Li /** Store the diagonal element of S and restore
1028*5ddc57e5SXin Li the corresponding diagonal element of R. **/
1029*5ddc57e5SXin Li Sdiag[j] = r[j*ldr+j];
1030*5ddc57e5SXin Li r[j*ldr+j] = x[j];
1031*5ddc57e5SXin Li }
1032*5ddc57e5SXin Li
1033*5ddc57e5SXin Li /*** Solve the triangular system for z. If the system is singular, then
1034*5ddc57e5SXin Li obtain a least-squares solution. ***/
1035*5ddc57e5SXin Li
1036*5ddc57e5SXin Li nsing = n;
1037*5ddc57e5SXin Li for (j = 0; j < n; j++) {
1038*5ddc57e5SXin Li if (Sdiag[j] == 0 && nsing == n)
1039*5ddc57e5SXin Li nsing = j;
1040*5ddc57e5SXin Li if (nsing < n)
1041*5ddc57e5SXin Li W[j] = 0;
1042*5ddc57e5SXin Li }
1043*5ddc57e5SXin Li
1044*5ddc57e5SXin Li for (j = nsing-1; j >= 0; j--) {
1045*5ddc57e5SXin Li sum = 0;
1046*5ddc57e5SXin Li for (i = j+1; i < nsing; i++)
1047*5ddc57e5SXin Li sum += r[j*ldr+i] * W[i];
1048*5ddc57e5SXin Li W[j] = (W[j] - sum) / Sdiag[j];
1049*5ddc57e5SXin Li }
1050*5ddc57e5SXin Li
1051*5ddc57e5SXin Li /*** Permute the components of z back to components of x. ***/
1052*5ddc57e5SXin Li
1053*5ddc57e5SXin Li for (j = 0; j < n; j++)
1054*5ddc57e5SXin Li x[Pivot[j]] = W[j];
1055*5ddc57e5SXin Li
1056*5ddc57e5SXin Li } /*** lm_qrsolv. ***/
1057*5ddc57e5SXin Li
1058*5ddc57e5SXin Li /******************************************************************************/
1059*5ddc57e5SXin Li /* lm_enorm (Euclidean norm) */
1060*5ddc57e5SXin Li /******************************************************************************/
1061*5ddc57e5SXin Li
lm_enorm(int n,const double * x)1062*5ddc57e5SXin Li double lm_enorm(int n, const double* x)
1063*5ddc57e5SXin Li /* This function calculates the Euclidean norm of an n-vector x.
1064*5ddc57e5SXin Li *
1065*5ddc57e5SXin Li * The Euclidean norm is computed by accumulating the sum of squares
1066*5ddc57e5SXin Li * in three different sums. The sums of squares for the small and large
1067*5ddc57e5SXin Li * components are scaled so that no overflows occur. Non-destructive
1068*5ddc57e5SXin Li * underflows are permitted. Underflows and overflows do not occur in
1069*5ddc57e5SXin Li * the computation of the unscaled sum of squares for the intermediate
1070*5ddc57e5SXin Li * components. The definitions of small, intermediate and large components
1071*5ddc57e5SXin Li * depend on two constants, LM_SQRT_DWARF and LM_SQRT_GIANT. The main
1072*5ddc57e5SXin Li * restrictions on these constants are that LM_SQRT_DWARF**2 not underflow
1073*5ddc57e5SXin Li * and LM_SQRT_GIANT**2 not overflow.
1074*5ddc57e5SXin Li *
1075*5ddc57e5SXin Li * Parameters:
1076*5ddc57e5SXin Li *
1077*5ddc57e5SXin Li * n is a positive integer INPUT variable.
1078*5ddc57e5SXin Li *
1079*5ddc57e5SXin Li * x is an INPUT array of length n.
1080*5ddc57e5SXin Li */
1081*5ddc57e5SXin Li {
1082*5ddc57e5SXin Li int i;
1083*5ddc57e5SXin Li double agiant, s1, s2, s3, xabs, x1max, x3max;
1084*5ddc57e5SXin Li
1085*5ddc57e5SXin Li s1 = 0;
1086*5ddc57e5SXin Li s2 = 0;
1087*5ddc57e5SXin Li s3 = 0;
1088*5ddc57e5SXin Li x1max = 0;
1089*5ddc57e5SXin Li x3max = 0;
1090*5ddc57e5SXin Li agiant = LM_SQRT_GIANT / n;
1091*5ddc57e5SXin Li
1092*5ddc57e5SXin Li /** Sum squares. **/
1093*5ddc57e5SXin Li for (i = 0; i < n; i++) {
1094*5ddc57e5SXin Li xabs = fabs(x[i]);
1095*5ddc57e5SXin Li if (xabs > LM_SQRT_DWARF) {
1096*5ddc57e5SXin Li if (xabs < agiant) {
1097*5ddc57e5SXin Li s2 += SQR(xabs);
1098*5ddc57e5SXin Li } else if (xabs > x1max) {
1099*5ddc57e5SXin Li s1 = 1 + s1 * SQR(x1max / xabs);
1100*5ddc57e5SXin Li x1max = xabs;
1101*5ddc57e5SXin Li } else {
1102*5ddc57e5SXin Li s1 += SQR(xabs / x1max);
1103*5ddc57e5SXin Li }
1104*5ddc57e5SXin Li } else if (xabs > x3max) {
1105*5ddc57e5SXin Li s3 = 1 + s3 * SQR(x3max / xabs);
1106*5ddc57e5SXin Li x3max = xabs;
1107*5ddc57e5SXin Li } else if (xabs != 0) {
1108*5ddc57e5SXin Li s3 += SQR(xabs / x3max);
1109*5ddc57e5SXin Li }
1110*5ddc57e5SXin Li }
1111*5ddc57e5SXin Li
1112*5ddc57e5SXin Li /** Calculate the norm. **/
1113*5ddc57e5SXin Li if (s1 != 0)
1114*5ddc57e5SXin Li return x1max * sqrt(s1 + (s2 / x1max) / x1max);
1115*5ddc57e5SXin Li else if (s2 != 0)
1116*5ddc57e5SXin Li if (s2 >= x3max)
1117*5ddc57e5SXin Li return sqrt(s2 * (1 + (x3max / s2) * (x3max * s3)));
1118*5ddc57e5SXin Li else
1119*5ddc57e5SXin Li return sqrt(x3max * ((s2 / x3max) + (x3max * s3)));
1120*5ddc57e5SXin Li else
1121*5ddc57e5SXin Li return x3max * sqrt(s3);
1122*5ddc57e5SXin Li
1123*5ddc57e5SXin Li } /*** lm_enorm. ***/
1124