http://git-wip-us.apache.org/repos/asf/impala/blob/0936e329/be/src/thirdparty/mpfit/mpfit.c ---------------------------------------------------------------------- diff --git a/be/src/thirdparty/mpfit/mpfit.c b/be/src/thirdparty/mpfit/mpfit.c new file mode 100644 index 0000000..1ab9abb --- /dev/null +++ b/be/src/thirdparty/mpfit/mpfit.c @@ -0,0 +1,2305 @@ +/* + * MINPACK-1 Least Squares Fitting Library + * + * Original public domain version by B. Garbow, K. Hillstrom, J. More' + * (Argonne National Laboratory, MINPACK project, March 1980) + * See the file DISCLAIMER for copyright information. + * + * Tranlation to C Language by S. Moshier (moshier.net) + * + * Enhancements and packaging by C. Markwardt + * (comparable to IDL fitting routine MPFIT + * see http://cow.physics.wisc.edu/~craigm/idl/idl.html) + */ + +/* Main mpfit library routines (double precision) + $Id: mpfit.c,v 1.24 2013/04/23 18:37:38 craigm Exp $ + */ + +#include <stdio.h> +#include <stdlib.h> +#include <math.h> +#include <string.h> +#include "mpfit.h" + +/* Forward declarations of functions in this module */ +static int mp_fdjac2(mp_func funct, + int m, int n, int *ifree, int npar, double *x, double *fvec, + double *fjac, int ldfjac, double epsfcn, + double *wa, void *priv, int *nfev, + double *step, double *dstep, int *dside, + int *qulimited, double *ulimit, + int *ddebug, double *ddrtol, double *ddatol, + double *wa2, double **dvecptr); +static void mp_qrfac(int m, int n, double *a, int lda, + int pivot, int *ipvt, int lipvt, + double *rdiag, double *acnorm, double *wa); +static void mp_qrsolv(int n, double *r, int ldr, int *ipvt, double *diag, + double *qtb, double *x, double *sdiag, double *wa); +static void mp_lmpar(int n, double *r, int ldr, int *ipvt, int *ifree, double *diag, + double *qtb, double delta, double *par, double *x, + double *sdiag, double *wa1, double *wa2); +static double mp_enorm(int n, double *x); +static double mp_dmax1(double a, double b); +static double mp_dmin1(double a, double b); +static int mp_min0(int a, int b); +static int mp_covar(int n, double *r, int ldr, int *ipvt, double tol, double *wa); + +/* Macro to call user function */ +#define mp_call(funct, m, n, x, fvec, dvec, priv) (*(funct))(m,n,x,fvec,dvec,priv) + +/* Macro to safely allocate memory */ +#define mp_malloc(dest,type,size) \ + dest = (type *) malloc( sizeof(type)*size ); \ + if (dest == 0) { \ + info = MP_ERR_MEMORY; \ + goto CLEANUP; \ + } else { \ + int _k; \ + for (_k=0; _k<(size); _k++) dest[_k] = 0; \ + } + +/* +* ********** +* +* subroutine mpfit +* +* the purpose of mpfit is to minimize the sum of the squares of +* m nonlinear functions in n variables by a modification of +* the levenberg-marquardt algorithm. the user must provide a +* subroutine which calculates the functions. the jacobian is +* then calculated by a finite-difference approximation. +* +* mp_funct funct - function to be minimized +* int m - number of data points +* int npar - number of fit parameters +* double *xall - array of n initial parameter values +* upon return, contains adjusted parameter values +* mp_par *pars - array of npar structures specifying constraints; +* or 0 (null pointer) for unconstrained fitting +* [ see README and mpfit.h for definition & use of mp_par] +* mp_config *config - pointer to structure which specifies the +* configuration of mpfit(); or 0 (null pointer) +* if the default configuration is to be used. +* See README and mpfit.h for definition and use +* of config. +* void *private - any private user data which is to be passed directly +* to funct without modification by mpfit(). +* mp_result *result - pointer to structure, which upon return, contains +* the results of the fit. The user should zero this +* structure. If any of the array values are to be +* returned, the user should allocate storage for them +* and assign the corresponding pointer in *result. +* Upon return, *result will be updated, and +* any of the non-null arrays will be filled. +* +* +* FORTRAN DOCUMENTATION BELOW +* +* +* the subroutine statement is +* +* subroutine lmdif(fcn,m,n,x,fvec,ftol,xtol,gtol,maxfev,epsfcn, +* diag,mode,factor,nprint,info,nfev,fjac, +* ldfjac,ipvt,qtf,wa1,wa2,wa3,wa4) +* +* where +* +* fcn is the name of the user-supplied subroutine which +* calculates the functions. fcn must be declared +* in an external statement in the user calling +* program, and should be written as follows. +* +* subroutine fcn(m,n,x,fvec,iflag) +* integer m,n,iflag +* double precision x(n),fvec(m) +* ---------- +* calculate the functions at x and +* return this vector in fvec. +* ---------- +* return +* end +* +* the value of iflag should not be changed by fcn unless +* the user wants to terminate execution of lmdif. +* in this case set iflag to a negative integer. +* +* m is a positive integer input variable set to the number +* of functions. +* +* n is a positive integer input variable set to the number +* of variables. n must not exceed m. +* +* x is an array of length n. on input x must contain +* an initial estimate of the solution vector. on output x +* contains the final estimate of the solution vector. +* +* fvec is an output array of length m which contains +* the functions evaluated at the output x. +* +* ftol is a nonnegative input variable. termination +* occurs when both the actual and predicted relative +* reductions in the sum of squares are at most ftol. +* therefore, ftol measures the relative error desired +* in the sum of squares. +* +* xtol is a nonnegative input variable. termination +* occurs when the relative error between two consecutive +* iterates is at most xtol. therefore, xtol measures the +* relative error desired in the approximate solution. +* +* gtol is a nonnegative input variable. termination +* occurs when the cosine of the angle between fvec and +* any column of the jacobian is at most gtol in absolute +* value. therefore, gtol measures the orthogonality +* desired between the function vector and the columns +* of the jacobian. +* +* maxfev is a positive integer input variable. termination +* occurs when the number of calls to fcn is at least +* maxfev by the end of an iteration. +* +* epsfcn is an input variable used in determining a suitable +* step length for the forward-difference approximation. this +* approximation assumes that the relative errors in the +* functions are of the order of epsfcn. if epsfcn is less +* than the machine precision, it is assumed that the relative +* errors in the functions are of the order of the machine +* precision. +* +* diag is an array of length n. if mode = 1 (see +* below), diag is internally set. if mode = 2, diag +* must contain positive entries that serve as +* multiplicative scale factors for the variables. +* +* mode is an integer input variable. if mode = 1, the +* variables will be scaled internally. if mode = 2, +* the scaling is specified by the input diag. other +* values of mode are equivalent to mode = 1. +* +* factor is a positive input variable used in determining the +* initial step bound. this bound is set to the product of +* factor and the euclidean norm of diag*x if nonzero, or else +* to factor itself. in most cases factor should lie in the +* interval (.1,100.). 100. is a generally recommended value. +* +* nprint is an integer input variable that enables controlled +* printing of iterates if it is positive. in this case, +* fcn is called with iflag = 0 at the beginning of the first +* iteration and every nprint iterations thereafter and +* immediately prior to return, with x and fvec available +* for printing. if nprint is not positive, no special calls +* of fcn with iflag = 0 are made. +* +* info is an integer output variable. if the user has +* terminated execution, info is set to the (negative) +* value of iflag. see description of fcn. otherwise, +* info is set as follows. +* +* info = 0 improper input parameters. +* +* info = 1 both actual and predicted relative reductions +* in the sum of squares are at most ftol. +* +* info = 2 relative error between two consecutive iterates +* is at most xtol. +* +* info = 3 conditions for info = 1 and info = 2 both hold. +* +* info = 4 the cosine of the angle between fvec and any +* column of the jacobian is at most gtol in +* absolute value. +* +* info = 5 number of calls to fcn has reached or +* exceeded maxfev. +* +* info = 6 ftol is too small. no further reduction in +* the sum of squares is possible. +* +* info = 7 xtol is too small. no further improvement in +* the approximate solution x is possible. +* +* info = 8 gtol is too small. fvec is orthogonal to the +* columns of the jacobian to machine precision. +* +* nfev is an integer output variable set to the number of +* calls to fcn. +* +* fjac is an output m by n array. the upper n by n submatrix +* of fjac contains an upper triangular matrix r with +* diagonal elements of nonincreasing magnitude such that +* +* t t t +* p *(jac *jac)*p = r *r, +* +* where p is a permutation matrix and jac is the final +* calculated jacobian. column j of p is column ipvt(j) +* (see below) of the identity matrix. the lower trapezoidal +* part of fjac contains information generated during +* the computation of r. +* +* ldfjac is a positive integer input variable not less than m +* which specifies the leading dimension of the array fjac. +* +* ipvt is an integer output array of length n. ipvt +* defines a permutation matrix p such that jac*p = q*r, +* where jac is the final calculated jacobian, q is +* orthogonal (not stored), and r is upper triangular +* with diagonal elements of nonincreasing magnitude. +* column j of p is column ipvt(j) of the identity matrix. +* +* qtf is an output array of length n which contains +* the first n elements of the vector (q transpose)*fvec. +* +* wa1, wa2, and wa3 are work arrays of length n. +* +* wa4 is a work array of length m. +* +* subprograms called +* +* user-supplied ...... fcn +* +* minpack-supplied ... dpmpar,enorm,fdjac2,lmpar,qrfac +* +* fortran-supplied ... dabs,dmax1,dmin1,dsqrt,mod +* +* argonne national laboratory. minpack project. march 1980. +* burton s. garbow, kenneth e. hillstrom, jorge j. more +* +* ********** */ + + +int mpfit(mp_func funct, int m, int npar, + double *xall, mp_par *pars, mp_config *config, void *private_data, + mp_result *result) +{ + mp_config conf; + int i, j, info, iflag, nfree, npegged, iter; + int qanylim = 0; + + int ij,jj,l; + double actred,delta,dirder,fnorm,fnorm1,gnorm, orignorm; + double par,pnorm,prered,ratio; + double sum,temp,temp1,temp2,temp3,xnorm, alpha; + static double one = 1.0; + static double p1 = 0.1; + static double p5 = 0.5; + static double p25 = 0.25; + static double p75 = 0.75; + static double p0001 = 1.0e-4; + static double zero = 0.0; + int nfev = 0; + + double *step = 0, *dstep = 0, *llim = 0, *ulim = 0; + int *pfixed = 0, *mpside = 0, *ifree = 0, *qllim = 0, *qulim = 0; + int *ddebug = 0; + double *ddrtol = 0, *ddatol = 0; + + double *fvec = 0, *qtf = 0; + double *x = 0, *xnew = 0, *fjac = 0, *diag = 0; + double *wa1 = 0, *wa2 = 0, *wa3 = 0, *wa4 = 0; + double **dvecptr = 0; + int *ipvt = 0; + + int ldfjac; + + /* Default configuration */ + conf.ftol = 1e-10; + conf.xtol = 1e-10; + conf.gtol = 1e-10; + conf.stepfactor = 100.0; + conf.nprint = 1; + conf.epsfcn = MP_MACHEP0; + conf.maxiter = 200; + conf.douserscale = 0; + conf.maxfev = 0; + conf.covtol = 1e-14; + conf.nofinitecheck = 0; + + if (config) { + /* Transfer any user-specified configurations */ + if (config->ftol > 0) conf.ftol = config->ftol; + if (config->xtol > 0) conf.xtol = config->xtol; + if (config->gtol > 0) conf.gtol = config->gtol; + if (config->stepfactor > 0) conf.stepfactor = config->stepfactor; + if (config->nprint >= 0) conf.nprint = config->nprint; + if (config->epsfcn > 0) conf.epsfcn = config->epsfcn; + if (config->maxiter > 0) conf.maxiter = config->maxiter; + if (config->maxiter == MP_NO_ITER) conf.maxiter = 0; + if (config->douserscale != 0) conf.douserscale = config->douserscale; + if (config->covtol > 0) conf.covtol = config->covtol; + if (config->nofinitecheck > 0) conf.nofinitecheck = config->nofinitecheck; + conf.maxfev = config->maxfev; + } + + info = MP_ERR_INPUT; /* = 0 */ + iflag = 0; + nfree = 0; + npegged = 0; + + /* Basic error checking */ + if (funct == 0) { + return MP_ERR_FUNC; + } + + if ((m <= 0) || (xall == 0)) { + return MP_ERR_NPOINTS; + } + + if (npar <= 0) { + return MP_ERR_NFREE; + } + + fnorm = -1.0; + fnorm1 = -1.0; + xnorm = -1.0; + delta = 0.0; + + /* FIXED parameters? */ + mp_malloc(pfixed, int, npar); + if (pars) for (i=0; i<npar; i++) { + pfixed[i] = (pars[i].fixed)?1:0; + } + + /* Finite differencing step, absolute and relative, and sidedness of deriv */ + mp_malloc(step, double, npar); + mp_malloc(dstep, double, npar); + mp_malloc(mpside, int, npar); + mp_malloc(ddebug, int, npar); + mp_malloc(ddrtol, double, npar); + mp_malloc(ddatol, double, npar); + if (pars) for (i=0; i<npar; i++) { + step[i] = pars[i].step; + dstep[i] = pars[i].relstep; + mpside[i] = pars[i].side; + ddebug[i] = pars[i].deriv_debug; + ddrtol[i] = pars[i].deriv_reltol; + ddatol[i] = pars[i].deriv_abstol; + } + + /* Finish up the free parameters */ + nfree = 0; + mp_malloc(ifree, int, npar); + for (i=0, j=0; i<npar; i++) { + if (pfixed[i] == 0) { + nfree++; + ifree[j++] = i; + } + } + if (nfree == 0) { + info = MP_ERR_NFREE; + goto CLEANUP; + } + + if (pars) { + for (i=0; i<npar; i++) { + if ( (pars[i].limited[0] && (xall[i] < pars[i].limits[0])) || + (pars[i].limited[1] && (xall[i] > pars[i].limits[1])) ) { + info = MP_ERR_INITBOUNDS; + goto CLEANUP; + } + if ( (pars[i].fixed == 0) && pars[i].limited[0] && pars[i].limited[1] && + (pars[i].limits[0] >= pars[i].limits[1])) { + info = MP_ERR_BOUNDS; + goto CLEANUP; + } + } + + mp_malloc(qulim, int, nfree); + mp_malloc(qllim, int, nfree); + mp_malloc(ulim, double, nfree); + mp_malloc(llim, double, nfree); + + for (i=0; i<nfree; i++) { + qllim[i] = pars[ifree[i]].limited[0]; + qulim[i] = pars[ifree[i]].limited[1]; + llim[i] = pars[ifree[i]].limits[0]; + ulim[i] = pars[ifree[i]].limits[1]; + if (qllim[i] || qulim[i]) qanylim = 1; + } + } + + /* Sanity checking on input configuration */ + if ((npar <= 0) || (conf.ftol <= 0) || (conf.xtol <= 0) || + (conf.gtol <= 0) || (conf.maxiter < 0) || + (conf.stepfactor <= 0)) { + info = MP_ERR_PARAM; + goto CLEANUP; + } + + /* Ensure there are some degrees of freedom */ + if (m < nfree) { + info = MP_ERR_DOF; + goto CLEANUP; + } + + /* Allocate temporary storage */ + mp_malloc(fvec, double, m); + mp_malloc(qtf, double, nfree); + mp_malloc(x, double, nfree); + mp_malloc(xnew, double, npar); + mp_malloc(fjac, double, m*nfree); + ldfjac = m; + mp_malloc(diag, double, npar); + mp_malloc(wa1, double, npar); + mp_malloc(wa2, double, npar); + mp_malloc(wa3, double, npar); + mp_malloc(wa4, double, m); + mp_malloc(ipvt, int, npar); + mp_malloc(dvecptr, double *, npar); + + /* Evaluate user function with initial parameter values */ + iflag = mp_call(funct, m, npar, xall, fvec, 0, private_data); + nfev += 1; + if (iflag < 0) { + goto CLEANUP; + } + + fnorm = mp_enorm(m, fvec); + orignorm = fnorm*fnorm; + + /* Make a new copy */ + for (i=0; i<npar; i++) { + xnew[i] = xall[i]; + } + + /* Transfer free parameters to 'x' */ + for (i=0; i<nfree; i++) { + x[i] = xall[ifree[i]]; + } + + /* Initialize Levelberg-Marquardt parameter and iteration counter */ + + par = 0.0; + iter = 1; + for (i=0; i<nfree; i++) { + qtf[i] = 0; + } + + /* Beginning of the outer loop */ + OUTER_LOOP: + for (i=0; i<nfree; i++) { + xnew[ifree[i]] = x[i]; + } + + /* XXX call iterproc */ + + /* Calculate the jacobian matrix */ + iflag = mp_fdjac2(funct, m, nfree, ifree, npar, xnew, fvec, fjac, ldfjac, + conf.epsfcn, wa4, private_data, &nfev, + step, dstep, mpside, qulim, ulim, + ddebug, ddrtol, ddatol, wa2, dvecptr); + if (iflag < 0) { + goto CLEANUP; + } + + /* Determine if any of the parameters are pegged at the limits */ + if (qanylim) { + for (j=0; j<nfree; j++) { + int lpegged = (qllim[j] && (x[j] == llim[j])); + int upegged = (qulim[j] && (x[j] == ulim[j])); + sum = 0; + + /* If the parameter is pegged at a limit, compute the gradient + direction */ + if (lpegged || upegged) { + ij = j*ldfjac; + for (i=0; i<m; i++, ij++) { + sum += fvec[i] * fjac[ij]; + } + } + /* If pegged at lower limit and gradient is toward negative then + reset gradient to zero */ + if (lpegged && (sum > 0)) { + ij = j*ldfjac; + for (i=0; i<m; i++, ij++) fjac[ij] = 0; + } + /* If pegged at upper limit and gradient is toward positive then + reset gradient to zero */ + if (upegged && (sum < 0)) { + ij = j*ldfjac; + for (i=0; i<m; i++, ij++) fjac[ij] = 0; + } + } + } + + /* Compute the QR factorization of the jacobian */ + mp_qrfac(m,nfree,fjac,ldfjac,1,ipvt,nfree,wa1,wa2,wa3); + + /* + * on the first iteration and if mode is 1, scale according + * to the norms of the columns of the initial jacobian. + */ + if (iter == 1) { + if (conf.douserscale == 0) { + for (j=0; j<nfree; j++) { + diag[ifree[j]] = wa2[j]; + if (wa2[j] == zero ) { + diag[ifree[j]] = one; + } + } + } + + /* + * on the first iteration, calculate the norm of the scaled x + * and initialize the step bound delta. + */ + for (j=0; j<nfree; j++ ) { + wa3[j] = diag[ifree[j]] * x[j]; + } + + xnorm = mp_enorm(nfree, wa3); + delta = conf.stepfactor*xnorm; + if (delta == zero) delta = conf.stepfactor; + } + + /* + * form (q transpose)*fvec and store the first n components in + * qtf. + */ + for (i=0; i<m; i++ ) { + wa4[i] = fvec[i]; + } + + jj = 0; + for (j=0; j<nfree; j++ ) { + temp3 = fjac[jj]; + if (temp3 != zero) { + sum = zero; + ij = jj; + for (i=j; i<m; i++ ) { + sum += fjac[ij] * wa4[i]; + ij += 1; /* fjac[i+m*j] */ + } + temp = -sum / temp3; + ij = jj; + for (i=j; i<m; i++ ) { + wa4[i] += fjac[ij] * temp; + ij += 1; /* fjac[i+m*j] */ + } + } + fjac[jj] = wa1[j]; + jj += m+1; /* fjac[j+m*j] */ + qtf[j] = wa4[j]; + } + + /* ( From this point on, only the square matrix, consisting of the + triangle of R, is needed.) */ + + + if (conf.nofinitecheck) { + /* Check for overflow. This should be a cheap test here since FJAC + has been reduced to a (small) square matrix, and the test is + O(N^2). */ + int off = 0, nonfinite = 0; + + for (j=0; j<nfree; j++) { + for (i=0; i<nfree; i++) { + if (mpfinite(fjac[off+i]) == 0) nonfinite = 1; + } + off += ldfjac; + } + + if (nonfinite) { + info = MP_ERR_NAN; + goto CLEANUP; + } + } + + + /* + * compute the norm of the scaled gradient. + */ + gnorm = zero; + if (fnorm != zero) { + jj = 0; + for (j=0; j<nfree; j++ ) { + l = ipvt[j]; + if (wa2[l] != zero) { + sum = zero; + ij = jj; + for (i=0; i<=j; i++ ) { + sum += fjac[ij]*(qtf[i]/fnorm); + ij += 1; /* fjac[i+m*j] */ + } + gnorm = mp_dmax1(gnorm,fabs(sum/wa2[l])); + } + jj += m; + } + } + + /* + * test for convergence of the gradient norm. + */ + if (gnorm <= conf.gtol) info = MP_OK_DIR; + if (info != 0) goto L300; + if (conf.maxiter == 0) { + info = MP_MAXITER; + goto L300; + } + + /* + * rescale if necessary. + */ + if (conf.douserscale == 0) { + for (j=0; j<nfree; j++ ) { + diag[ifree[j]] = mp_dmax1(diag[ifree[j]],wa2[j]); + } + } + + /* + * beginning of the inner loop. + */ + L200: + /* + * determine the levenberg-marquardt parameter. + */ + mp_lmpar(nfree,fjac,ldfjac,ipvt,ifree,diag,qtf,delta,&par,wa1,wa2,wa3,wa4); + /* + * store the direction p and x + p. calculate the norm of p. + */ + for (j=0; j<nfree; j++ ) { + wa1[j] = -wa1[j]; + } + + alpha = 1.0; + if (qanylim == 0) { + /* No parameter limits, so just move to new position WA2 */ + for (j=0; j<nfree; j++ ) { + wa2[j] = x[j] + wa1[j]; + } + + } else { + /* Respect the limits. If a step were to go out of bounds, then + * we should take a step in the same direction but shorter distance. + * The step should take us right to the limit in that case. + */ + for (j=0; j<nfree; j++) { + int lpegged = (qllim[j] && (x[j] <= llim[j])); + int upegged = (qulim[j] && (x[j] >= ulim[j])); + int dwa1 = fabs(wa1[j]) > MP_MACHEP0; + + if (lpegged && (wa1[j] < 0)) wa1[j] = 0; + if (upegged && (wa1[j] > 0)) wa1[j] = 0; + + if (dwa1 && qllim[j] && ((x[j] + wa1[j]) < llim[j])) { + alpha = mp_dmin1(alpha, (llim[j]-x[j])/wa1[j]); + } + if (dwa1 && qulim[j] && ((x[j] + wa1[j]) > ulim[j])) { + alpha = mp_dmin1(alpha, (ulim[j]-x[j])/wa1[j]); + } + } + + /* Scale the resulting vector, advance to the next position */ + for (j=0; j<nfree; j++) { + double sgnu, sgnl; + double ulim1, llim1; + + wa1[j] = wa1[j] * alpha; + wa2[j] = x[j] + wa1[j]; + + /* Adjust the output values. If the step put us exactly + * on a boundary, make sure it is exact. + */ + sgnu = (ulim[j] >= 0) ? (+1) : (-1); + sgnl = (llim[j] >= 0) ? (+1) : (-1); + ulim1 = ulim[j]*(1-sgnu*MP_MACHEP0) - ((ulim[j] == 0)?(MP_MACHEP0):0); + llim1 = llim[j]*(1+sgnl*MP_MACHEP0) + ((llim[j] == 0)?(MP_MACHEP0):0); + + if (qulim[j] && (wa2[j] >= ulim1)) { + wa2[j] = ulim[j]; + } + if (qllim[j] && (wa2[j] <= llim1)) { + wa2[j] = llim[j]; + } + } + + } + + for (j=0; j<nfree; j++ ) { + wa3[j] = diag[ifree[j]]*wa1[j]; + } + + pnorm = mp_enorm(nfree,wa3); + + /* + * on the first iteration, adjust the initial step bound. + */ + if (iter == 1) { + delta = mp_dmin1(delta,pnorm); + } + + /* + * evaluate the function at x + p and calculate its norm. + */ + for (i=0; i<nfree; i++) { + xnew[ifree[i]] = wa2[i]; + } + + iflag = mp_call(funct, m, npar, xnew, wa4, 0, private_data); + nfev += 1; + if (iflag < 0) goto L300; + + fnorm1 = mp_enorm(m,wa4); + + /* + * compute the scaled actual reduction. + */ + actred = -one; + if ((p1*fnorm1) < fnorm) { + temp = fnorm1/fnorm; + actred = one - temp * temp; + } + + /* + * compute the scaled predicted reduction and + * the scaled directional derivative. + */ + jj = 0; + for (j=0; j<nfree; j++ ) { + wa3[j] = zero; + l = ipvt[j]; + temp = wa1[l]; + ij = jj; + for (i=0; i<=j; i++ ) { + wa3[i] += fjac[ij]*temp; + ij += 1; /* fjac[i+m*j] */ + } + jj += m; + } + + /* Remember, alpha is the fraction of the full LM step actually + * taken + */ + + temp1 = mp_enorm(nfree,wa3)*alpha/fnorm; + temp2 = (sqrt(alpha*par)*pnorm)/fnorm; + prered = temp1*temp1 + (temp2*temp2)/p5; + dirder = -(temp1*temp1 + temp2*temp2); + + /* + * compute the ratio of the actual to the predicted + * reduction. + */ + ratio = zero; + if (prered != zero) { + ratio = actred/prered; + } + + /* + * update the step bound. + */ + + if (ratio <= p25) { + if (actred >= zero) { + temp = p5; + } else { + temp = p5*dirder/(dirder + p5*actred); + } + if (((p1*fnorm1) >= fnorm) + || (temp < p1) ) { + temp = p1; + } + delta = temp*mp_dmin1(delta,pnorm/p1); + par = par/temp; + } else { + if ((par == zero) || (ratio >= p75) ) { + delta = pnorm/p5; + par = p5*par; + } + } + + /* + * test for successful iteration. + */ + if (ratio >= p0001) { + + /* + * successful iteration. update x, fvec, and their norms. + */ + for (j=0; j<nfree; j++ ) { + x[j] = wa2[j]; + wa2[j] = diag[ifree[j]]*x[j]; + } + for (i=0; i<m; i++ ) { + fvec[i] = wa4[i]; + } + xnorm = mp_enorm(nfree,wa2); + fnorm = fnorm1; + iter += 1; + } + + /* + * tests for convergence. + */ + if ((fabs(actred) <= conf.ftol) && (prered <= conf.ftol) && + (p5*ratio <= one) ) { + info = MP_OK_CHI; + } + if (delta <= conf.xtol*xnorm) { + info = MP_OK_PAR; + } + if ((fabs(actred) <= conf.ftol) && (prered <= conf.ftol) && (p5*ratio <= one) + && ( info == 2) ) { + info = MP_OK_BOTH; + } + if (info != 0) { + goto L300; + } + + /* + * tests for termination and stringent tolerances. + */ + if ((conf.maxfev > 0) && (nfev >= conf.maxfev)) { + /* Too many function evaluations */ + info = MP_MAXITER; + } + if (iter >= conf.maxiter) { + /* Too many iterations */ + info = MP_MAXITER; + } + if ((fabs(actred) <= MP_MACHEP0) && (prered <= MP_MACHEP0) && (p5*ratio <= one) ) { + info = MP_FTOL; + } + if (delta <= MP_MACHEP0*xnorm) { + info = MP_XTOL; + } + if (gnorm <= MP_MACHEP0) { + info = MP_GTOL; + } + if (info != 0) { + goto L300; + } + + /* + * end of the inner loop. repeat if iteration unsuccessful. + */ + if (ratio < p0001) goto L200; + /* + * end of the outer loop. + */ + goto OUTER_LOOP; + + L300: + /* + * termination, either normal or user imposed. + */ + if (iflag < 0) { + info = iflag; + } + iflag = 0; + + for (i=0; i<nfree; i++) { + xall[ifree[i]] = x[i]; + } + + if ((conf.nprint > 0) && (info > 0)) { + iflag = mp_call(funct, m, npar, xall, fvec, 0, private_data); + nfev += 1; + } + + /* Compute number of pegged parameters */ + npegged = 0; + if (pars) for (i=0; i<npar; i++) { + if ((pars[i].limited[0] && (pars[i].limits[0] == xall[i])) || + (pars[i].limited[1] && (pars[i].limits[1] == xall[i]))) { + npegged ++; + } + } + + /* Compute and return the covariance matrix and/or parameter errors */ + if (result && (result->covar || result->xerror)) { + mp_covar(nfree, fjac, ldfjac, ipvt, conf.covtol, wa2); + + if (result->covar) { + /* Zero the destination covariance array */ + for (j=0; j<(npar*npar); j++) result->covar[j] = 0; + + /* Transfer the covariance array */ + for (j=0; j<nfree; j++) { + for (i=0; i<nfree; i++) { + result->covar[ifree[j]*npar+ifree[i]] = fjac[j*ldfjac+i]; + } + } + } + + if (result->xerror) { + for (j=0; j<npar; j++) result->xerror[j] = 0; + + for (j=0; j<nfree; j++) { + double cc = fjac[j*ldfjac+j]; + if (cc > 0) result->xerror[ifree[j]] = sqrt(cc); + } + } + } + + if (result) { + strcpy(result->version, MPFIT_VERSION); + result->bestnorm = mp_dmax1(fnorm,fnorm1); + result->bestnorm *= result->bestnorm; + result->orignorm = orignorm; + result->status = info; + result->niter = iter; + result->nfev = nfev; + result->npar = npar; + result->nfree = nfree; + result->npegged = npegged; + result->nfunc = m; + + /* Copy residuals if requested */ + if (result->resid) { + for (j=0; j<m; j++) result->resid[j] = fvec[j]; + } + } + + + CLEANUP: + if (fvec) free(fvec); + if (qtf) free(qtf); + if (x) free(x); + if (xnew) free(xnew); + if (fjac) free(fjac); + if (diag) free(diag); + if (wa1) free(wa1); + if (wa2) free(wa2); + if (wa3) free(wa3); + if (wa4) free(wa4); + if (ipvt) free(ipvt); + if (pfixed) free(pfixed); + if (step) free(step); + if (dstep) free(dstep); + if (mpside) free(mpside); + if (ddebug) free(ddebug); + if (ddrtol) free(ddrtol); + if (ddatol) free(ddatol); + if (ifree) free(ifree); + if (qllim) free(qllim); + if (qulim) free(qulim); + if (llim) free(llim); + if (ulim) free(ulim); + if (dvecptr) free(dvecptr); + + return info; +} + + +/************************fdjac2.c*************************/ + +static +int mp_fdjac2(mp_func funct, + int m, int n, int *ifree, int npar, double *x, double *fvec, + double *fjac, int ldfjac, double epsfcn, + double *wa, void *priv, int *nfev, + double *step, double *dstep, int *dside, + int *qulimited, double *ulimit, + int *ddebug, double *ddrtol, double *ddatol, + double *wa2, double **dvec) +{ +/* +* ********** +* +* subroutine fdjac2 +* +* this subroutine computes a forward-difference approximation +* to the m by n jacobian matrix associated with a specified +* problem of m functions in n variables. +* +* the subroutine statement is +* +* subroutine fdjac2(fcn,m,n,x,fvec,fjac,ldfjac,iflag,epsfcn,wa) +* +* where +* +* fcn is the name of the user-supplied subroutine which +* calculates the functions. fcn must be declared +* in an external statement in the user calling +* program, and should be written as follows. +* +* subroutine fcn(m,n,x,fvec,iflag) +* integer m,n,iflag +* double precision x(n),fvec(m) +* ---------- +* calculate the functions at x and +* return this vector in fvec. +* ---------- +* return +* end +* +* the value of iflag should not be changed by fcn unless +* the user wants to terminate execution of fdjac2. +* in this case set iflag to a negative integer. +* +* m is a positive integer input variable set to the number +* of functions. +* +* n is a positive integer input variable set to the number +* of variables. n must not exceed m. +* +* x is an input array of length n. +* +* fvec is an input array of length m which must contain the +* functions evaluated at x. +* +* fjac is an output m by n array which contains the +* approximation to the jacobian matrix evaluated at x. +* +* ldfjac is a positive integer input variable not less than m +* which specifies the leading dimension of the array fjac. +* +* iflag is an integer variable which can be used to terminate +* the execution of fdjac2. see description of fcn. +* +* epsfcn is an input variable used in determining a suitable +* step length for the forward-difference approximation. this +* approximation assumes that the relative errors in the +* functions are of the order of epsfcn. if epsfcn is less +* than the machine precision, it is assumed that the relative +* errors in the functions are of the order of the machine +* precision. +* +* wa is a work array of length m. +* +* subprograms called +* +* user-supplied ...... fcn +* +* minpack-supplied ... dpmpar +* +* fortran-supplied ... dabs,dmax1,dsqrt +* +* argonne national laboratory. minpack project. march 1980. +* burton s. garbow, kenneth e. hillstrom, jorge j. more +* + ********** +*/ + int i,j,ij; + int iflag = 0; + double eps,h,temp; + static double zero = 0.0; + int has_analytical_deriv = 0, has_numerical_deriv = 0; + int has_debug_deriv = 0; + + temp = mp_dmax1(epsfcn,MP_MACHEP0); + eps = sqrt(temp); + ij = 0; + ldfjac = 0; /* Prevent compiler warning */ + if (ldfjac){} /* Prevent compiler warning */ + + for (j=0; j<npar; j++) dvec[j] = 0; + + /* Initialize the Jacobian derivative matrix */ + for (j=0; j<(n*m); j++) fjac[j] = 0; + + /* Check for which parameters need analytical derivatives and which + need numerical ones */ + for (j=0; j<n; j++) { /* Loop through free parameters only */ + if (dside && dside[ifree[j]] == 3 && ddebug[ifree[j]] == 0) { + /* Purely analytical derivatives */ + dvec[ifree[j]] = fjac + j*m; + has_analytical_deriv = 1; + } else if (dside && ddebug[ifree[j]] == 1) { + /* Numerical and analytical derivatives as a debug cross-check */ + dvec[ifree[j]] = fjac + j*m; + has_analytical_deriv = 1; + has_numerical_deriv = 1; + has_debug_deriv = 1; + } else { + has_numerical_deriv = 1; + } + } + + /* If there are any parameters requiring analytical derivatives, + then compute them first. */ + if (has_analytical_deriv) { + iflag = mp_call(funct, m, npar, x, wa, dvec, priv); + if (nfev) *nfev = *nfev + 1; + if (iflag < 0 ) goto DONE; + } + + if (has_debug_deriv) { + printf("FJAC DEBUG BEGIN\n"); + printf("# %10s %10s %10s %10s %10s %10s\n", + "IPNT", "FUNC", "DERIV_U", "DERIV_N", "DIFF_ABS", "DIFF_REL"); + } + + /* Any parameters requiring numerical derivatives */ + if (has_numerical_deriv) for (j=0; j<n; j++) { /* Loop thru free parms */ + int dsidei = (dside)?(dside[ifree[j]]):(0); + int debug = ddebug[ifree[j]]; + double dr = ddrtol[ifree[j]], da = ddatol[ifree[j]]; + + /* Check for debugging */ + if (debug) { + printf("FJAC PARM %d\n", ifree[j]); + } + + /* Skip parameters already done by user-computed partials */ + if (dside && dsidei == 3) continue; + + temp = x[ifree[j]]; + h = eps * fabs(temp); + if (step && step[ifree[j]] > 0) h = step[ifree[j]]; + if (dstep && dstep[ifree[j]] > 0) h = fabs(dstep[ifree[j]]*temp); + if (h == zero) h = eps; + + /* If negative step requested, or we are against the upper limit */ + if ((dside && dsidei == -1) || + (dside && dsidei == 0 && + qulimited && ulimit && qulimited[j] && + (temp > (ulimit[j]-h)))) { + h = -h; + } + + x[ifree[j]] = temp + h; + iflag = mp_call(funct, m, npar, x, wa, 0, priv); + if (nfev) *nfev = *nfev + 1; + if (iflag < 0 ) goto DONE; + x[ifree[j]] = temp; + + if (dsidei <= 1) { + /* COMPUTE THE ONE-SIDED DERIVATIVE */ + if (! debug) { + /* Non-debug path for speed */ + for (i=0; i<m; i++, ij++) { + fjac[ij] = (wa[i] - fvec[i])/h; /* fjac[i+m*j] */ + } + } else { + /* Debug path for correctness */ + for (i=0; i<m; i++, ij++) { + double fjold = fjac[ij]; + fjac[ij] = (wa[i] - fvec[i])/h; /* fjac[i+m*j] */ + if ((da == 0 && dr == 0 && (fjold != 0 || fjac[ij] != 0)) || + ((da != 0 || dr != 0) && (fabs(fjold-fjac[ij]) > da + fabs(fjold)*dr))) { + printf(" %10d %10.4g %10.4g %10.4g %10.4g %10.4g\n", + i, fvec[i], fjold, fjac[ij], fjold-fjac[ij], + (fjold == 0)?(0):((fjold-fjac[ij])/fjold)); + } + } + } /* end debugging */ + + } else { /* dside > 2 */ + /* COMPUTE THE TWO-SIDED DERIVATIVE */ + for (i=0; i<m; i++) { + wa2[i] = wa[i]; + } + + /* Evaluate at x - h */ + x[ifree[j]] = temp - h; + iflag = mp_call(funct, m, npar, x, wa, 0, priv); + if (nfev) *nfev = *nfev + 1; + if (iflag < 0 ) goto DONE; + x[ifree[j]] = temp; + + /* Now compute derivative as (f(x+h) - f(x-h))/(2h) */ + if (! debug ) { + /* Non-debug path for speed */ + for (i=0; i<m; i++, ij++) { + fjac[ij] = (fjac[ij] - wa[i])/(2*h); /* fjac[i+m*j] */ + } + } else { + /* Debug path for correctness */ + for (i=0; i<m; i++, ij++) { + double fjold = fjac[ij]; + fjac[ij] = (wa2[i] - wa[i])/(2*h); /* fjac[i+m*j] */ + if ((da == 0 && dr == 0 && (fjold != 0 || fjac[ij] != 0)) || + ((da != 0 || dr != 0) && (fabs(fjold-fjac[ij]) > da + fabs(fjold)*dr))) { + printf(" %10d %10.4g %10.4g %10.4g %10.4g %10.4g\n", + i, fvec[i], fjold, fjac[ij], fjold-fjac[ij], + (fjold == 0)?(0):((fjold-fjac[ij])/fjold)); + } + } + } /* end debugging */ + + } /* if (dside > 2) */ + } /* if (has_numerical_derivative) */ + + if (has_debug_deriv) { + printf("FJAC DEBUG END\n"); + } + + DONE: + if (iflag < 0) return iflag; + return 0; + /* + * last card of subroutine fdjac2. + */ +} + + +/************************qrfac.c*************************/ + +static +void mp_qrfac(int m, int n, double *a, int lda, + int pivot, int *ipvt, int lipvt, + double *rdiag, double *acnorm, double *wa) +{ +/* +* ********** +* +* subroutine qrfac +* +* this subroutine uses householder transformations with column +* pivoting (optional) to compute a qr factorization of the +* m by n matrix a. that is, qrfac determines an orthogonal +* matrix q, a permutation matrix p, and an upper trapezoidal +* matrix r with diagonal elements of nonincreasing magnitude, +* such that a*p = q*r. the householder transformation for +* column k, k = 1,2,...,min(m,n), is of the form +* +* t +* i - (1/u(k))*u*u +* +* where u has zeros in the first k-1 positions. the form of +* this transformation and the method of pivoting first +* appeared in the corresponding linpack subroutine. +* +* the subroutine statement is +* +* subroutine qrfac(m,n,a,lda,pivot,ipvt,lipvt,rdiag,acnorm,wa) +* +* where +* +* m is a positive integer input variable set to the number +* of rows of a. +* +* n is a positive integer input variable set to the number +* of columns of a. +* +* a is an m by n array. on input a contains the matrix for +* which the qr factorization is to be computed. on output +* the strict upper trapezoidal part of a contains the strict +* upper trapezoidal part of r, and the lower trapezoidal +* part of a contains a factored form of q (the non-trivial +* elements of the u vectors described above). +* +* lda is a positive integer input variable not less than m +* which specifies the leading dimension of the array a. +* +* pivot is a logical input variable. if pivot is set true, +* then column pivoting is enforced. if pivot is set false, +* then no column pivoting is done. +* +* ipvt is an integer output array of length lipvt. ipvt +* defines the permutation matrix p such that a*p = q*r. +* column j of p is column ipvt(j) of the identity matrix. +* if pivot is false, ipvt is not referenced. +* +* lipvt is a positive integer input variable. if pivot is false, +* then lipvt may be as small as 1. if pivot is true, then +* lipvt must be at least n. +* +* rdiag is an output array of length n which contains the +* diagonal elements of r. +* +* acnorm is an output array of length n which contains the +* norms of the corresponding columns of the input matrix a. +* if this information is not needed, then acnorm can coincide +* with rdiag. +* +* wa is a work array of length n. if pivot is false, then wa +* can coincide with rdiag. +* +* subprograms called +* +* minpack-supplied ... dpmpar,enorm +* +* fortran-supplied ... dmax1,dsqrt,min0 +* +* argonne national laboratory. minpack project. march 1980. +* burton s. garbow, kenneth e. hillstrom, jorge j. more +* +* ********** +*/ + int i,ij,jj,j,jp1,k,kmax,minmn; + double ajnorm,sum,temp; + static double zero = 0.0; + static double one = 1.0; + static double p05 = 0.05; + + lda = 0; /* Prevent compiler warning */ + lipvt = 0; /* Prevent compiler warning */ + if (lda) {} /* Prevent compiler warning */ + if (lipvt) {} /* Prevent compiler warning */ + + /* + * compute the initial column norms and initialize several arrays. + */ + ij = 0; + for (j=0; j<n; j++) { + acnorm[j] = mp_enorm(m,&a[ij]); + rdiag[j] = acnorm[j]; + wa[j] = rdiag[j]; + if (pivot != 0) + ipvt[j] = j; + ij += m; /* m*j */ + } + /* + * reduce a to r with householder transformations. + */ + minmn = mp_min0(m,n); + for (j=0; j<minmn; j++) { + if (pivot == 0) + goto L40; + /* + * bring the column of largest norm into the pivot position. + */ + kmax = j; + for (k=j; k<n; k++) + { + if (rdiag[k] > rdiag[kmax]) + kmax = k; + } + if (kmax == j) + goto L40; + + ij = m * j; + jj = m * kmax; + for (i=0; i<m; i++) + { + temp = a[ij]; /* [i+m*j] */ + a[ij] = a[jj]; /* [i+m*kmax] */ + a[jj] = temp; + ij += 1; + jj += 1; + } + rdiag[kmax] = rdiag[j]; + wa[kmax] = wa[j]; + k = ipvt[j]; + ipvt[j] = ipvt[kmax]; + ipvt[kmax] = k; + + L40: + /* + * compute the householder transformation to reduce the + * j-th column of a to a multiple of the j-th unit vector. + */ + jj = j + m*j; + ajnorm = mp_enorm(m-j,&a[jj]); + if (ajnorm == zero) + goto L100; + if (a[jj] < zero) + ajnorm = -ajnorm; + ij = jj; + for (i=j; i<m; i++) + { + a[ij] /= ajnorm; + ij += 1; /* [i+m*j] */ + } + a[jj] += one; + /* + * apply the transformation to the remaining columns + * and update the norms. + */ + jp1 = j + 1; + if (jp1 < n) + { + for (k=jp1; k<n; k++) + { + sum = zero; + ij = j + m*k; + jj = j + m*j; + for (i=j; i<m; i++) + { + sum += a[jj]*a[ij]; + ij += 1; /* [i+m*k] */ + jj += 1; /* [i+m*j] */ + } + temp = sum/a[j+m*j]; + ij = j + m*k; + jj = j + m*j; + for (i=j; i<m; i++) + { + a[ij] -= temp*a[jj]; + ij += 1; /* [i+m*k] */ + jj += 1; /* [i+m*j] */ + } + if ((pivot != 0) && (rdiag[k] != zero)) + { + temp = a[j+m*k]/rdiag[k]; + temp = mp_dmax1( zero, one-temp*temp ); + rdiag[k] *= sqrt(temp); + temp = rdiag[k]/wa[k]; + if ((p05*temp*temp) <= MP_MACHEP0) + { + rdiag[k] = mp_enorm(m-j-1,&a[jp1+m*k]); + wa[k] = rdiag[k]; + } + } + } + } + + L100: + rdiag[j] = -ajnorm; + } + /* + * last card of subroutine qrfac. + */ +} + +/************************qrsolv.c*************************/ + +static +void mp_qrsolv(int n, double *r, int ldr, int *ipvt, double *diag, + double *qtb, double *x, double *sdiag, double *wa) +{ +/* +* ********** +* +* subroutine qrsolv +* +* given an m by n matrix a, an n by n diagonal matrix d, +* and an m-vector b, the problem is to determine an x which +* solves the system +* +* a*x = b , d*x = 0 , +* +* in the least squares sense. +* +* this subroutine completes the solution of the problem +* if it is provided with the necessary information from the +* qr factorization, with column pivoting, of a. that is, if +* a*p = q*r, where p is a permutation matrix, q has orthogonal +* columns, and r is an upper triangular matrix with diagonal +* elements of nonincreasing magnitude, then qrsolv expects +* the full upper triangle of r, the permutation matrix p, +* and the first n components of (q transpose)*b. the system +* a*x = b, d*x = 0, is then equivalent to +* +* t t +* r*z = q *b , p *d*p*z = 0 , +* +* where x = p*z. if this system does not have full rank, +* then a least squares solution is obtained. on output qrsolv +* also provides an upper triangular matrix s such that +* +* t t t +* p *(a *a + d*d)*p = s *s . +* +* s is computed within qrsolv and may be of separate interest. +* +* the subroutine statement is +* +* subroutine qrsolv(n,r,ldr,ipvt,diag,qtb,x,sdiag,wa) +* +* where +* +* n is a positive integer input variable set to the order of r. +* +* r is an n by n array. on input the full upper triangle +* must contain the full upper triangle of the matrix r. +* on output the full upper triangle is unaltered, and the +* strict lower triangle contains the strict upper triangle +* (transposed) of the upper triangular matrix s. +* +* ldr is a positive integer input variable not less than n +* which specifies the leading dimension of the array r. +* +* ipvt is an integer input array of length n which defines the +* permutation matrix p such that a*p = q*r. column j of p +* is column ipvt(j) of the identity matrix. +* +* diag is an input array of length n which must contain the +* diagonal elements of the matrix d. +* +* qtb is an input array of length n which must contain the first +* n elements of the vector (q transpose)*b. +* +* x is an output array of length n which contains the least +* squares solution of the system a*x = b, d*x = 0. +* +* sdiag is an output array of length n which contains the +* diagonal elements of the upper triangular matrix s. +* +* wa is a work array of length n. +* +* subprograms called +* +* fortran-supplied ... dabs,dsqrt +* +* argonne national laboratory. minpack project. march 1980. +* burton s. garbow, kenneth e. hillstrom, jorge j. more +* +* ********** +*/ + int i,ij,ik,kk,j,jp1,k,kp1,l,nsing; + double cosx,cotan,qtbpj,sinx,sum,tanx,temp; + static double zero = 0.0; + static double p25 = 0.25; + static double p5 = 0.5; + + /* + * copy r and (q transpose)*b to preserve input and initialize s. + * in particular, save the diagonal elements of r in x. + */ + kk = 0; + for (j=0; j<n; j++) { + ij = kk; + ik = kk; + for (i=j; i<n; i++) + { + r[ij] = r[ik]; + ij += 1; /* [i+ldr*j] */ + ik += ldr; /* [j+ldr*i] */ + } + x[j] = r[kk]; + wa[j] = qtb[j]; + kk += ldr+1; /* j+ldr*j */ + } + + /* + * eliminate the diagonal matrix d using a givens rotation. + */ + for (j=0; j<n; j++) { + /* + * prepare the row of d to be eliminated, locating the + * diagonal element using p from the qr factorization. + */ + l = ipvt[j]; + if (diag[l] == zero) + goto L90; + for (k=j; k<n; k++) + sdiag[k] = zero; + sdiag[j] = diag[l]; + /* + * the transformations to eliminate the row of d + * modify only a single element of (q transpose)*b + * beyond the first n, which is initially zero. + */ + qtbpj = zero; + for (k=j; k<n; k++) + { + /* + * determine a givens rotation which eliminates the + * appropriate element in the current row of d. + */ + if (sdiag[k] == zero) + continue; + kk = k + ldr * k; + if (fabs(r[kk]) < fabs(sdiag[k])) + { + cotan = r[kk]/sdiag[k]; + sinx = p5/sqrt(p25+p25*cotan*cotan); + cosx = sinx*cotan; + } + else + { + tanx = sdiag[k]/r[kk]; + cosx = p5/sqrt(p25+p25*tanx*tanx); + sinx = cosx*tanx; + } + /* + * compute the modified diagonal element of r and + * the modified element of ((q transpose)*b,0). + */ + r[kk] = cosx*r[kk] + sinx*sdiag[k]; + temp = cosx*wa[k] + sinx*qtbpj; + qtbpj = -sinx*wa[k] + cosx*qtbpj; + wa[k] = temp; + /* + * accumulate the tranformation in the row of s. + */ + kp1 = k + 1; + if (n > kp1) + { + ik = kk + 1; + for (i=kp1; i<n; i++) + { + temp = cosx*r[ik] + sinx*sdiag[i]; + sdiag[i] = -sinx*r[ik] + cosx*sdiag[i]; + r[ik] = temp; + ik += 1; /* [i+ldr*k] */ + } + } + } + L90: + /* + * store the diagonal element of s and restore + * the corresponding diagonal element of r. + */ + kk = j + ldr*j; + sdiag[j] = r[kk]; + r[kk] = x[j]; + } + /* + * solve the triangular system for z. if the system is + * singular, then obtain a least squares solution. + */ + nsing = n; + for (j=0; j<n; j++) { + if ((sdiag[j] == zero) && (nsing == n)) + nsing = j; + if (nsing < n) + wa[j] = zero; + } + if (nsing < 1) + goto L150; + + for (k=0; k<nsing; k++) { + j = nsing - k - 1; + sum = zero; + jp1 = j + 1; + if (nsing > jp1) + { + ij = jp1 + ldr * j; + for (i=jp1; i<nsing; i++) + { + sum += r[ij]*wa[i]; + ij += 1; /* [i+ldr*j] */ + } + } + wa[j] = (wa[j] - sum)/sdiag[j]; + } + L150: + /* + * permute the components of z back to components of x. + */ + for (j=0; j<n; j++) { + l = ipvt[j]; + x[l] = wa[j]; + } + /* + * last card of subroutine qrsolv. + */ +} + +/************************lmpar.c*************************/ + +static +void mp_lmpar(int n, double *r, int ldr, int *ipvt, int *ifree, double *diag, + double *qtb, double delta, double *par, double *x, + double *sdiag, double *wa1, double *wa2) +{ + /* ********** + * + * subroutine lmpar + * + * given an m by n matrix a, an n by n nonsingular diagonal + * matrix d, an m-vector b, and a positive number delta, + * the problem is to determine a value for the parameter + * par such that if x solves the system + * + * a*x = b , sqrt(par)*d*x = 0 , + * + * in the least squares sense, and dxnorm is the euclidean + * norm of d*x, then either par is zero and + * + * (dxnorm-delta) .le. 0.1*delta , + * + * or par is positive and + * + * abs(dxnorm-delta) .le. 0.1*delta . + * + * this subroutine completes the solution of the problem + * if it is provided with the necessary information from the + * qr factorization, with column pivoting, of a. that is, if + * a*p = q*r, where p is a permutation matrix, q has orthogonal + * columns, and r is an upper triangular matrix with diagonal + * elements of nonincreasing magnitude, then lmpar expects + * the full upper triangle of r, the permutation matrix p, + * and the first n components of (q transpose)*b. on output + * lmpar also provides an upper triangular matrix s such that + * + * t t t + * p *(a *a + par*d*d)*p = s *s . + * + * s is employed within lmpar and may be of separate interest. + * + * only a few iterations are generally needed for convergence + * of the algorithm. if, however, the limit of 10 iterations + * is reached, then the output par will contain the best + * value obtained so far. + * + * the subroutine statement is + * + * subroutine lmpar(n,r,ldr,ipvt,diag,qtb,delta,par,x,sdiag, + * wa1,wa2) + * + * where + * + * n is a positive integer input variable set to the order of r. + * + * r is an n by n array. on input the full upper triangle + * must contain the full upper triangle of the matrix r. + * on output the full upper triangle is unaltered, and the + * strict lower triangle contains the strict upper triangle + * (transposed) of the upper triangular matrix s. + * + * ldr is a positive integer input variable not less than n + * which specifies the leading dimension of the array r. + * + * ipvt is an integer input array of length n which defines the + * permutation matrix p such that a*p = q*r. column j of p + * is column ipvt(j) of the identity matrix. + * + * diag is an input array of length n which must contain the + * diagonal elements of the matrix d. + * + * qtb is an input array of length n which must contain the first + * n elements of the vector (q transpose)*b. + * + * delta is a positive input variable which specifies an upper + * bound on the euclidean norm of d*x. + * + * par is a nonnegative variable. on input par contains an + * initial estimate of the levenberg-marquardt parameter. + * on output par contains the final estimate. + * + * x is an output array of length n which contains the least + * squares solution of the system a*x = b, sqrt(par)*d*x = 0, + * for the output par. + * + * sdiag is an output array of length n which contains the + * diagonal elements of the upper triangular matrix s. + * + * wa1 and wa2 are work arrays of length n. + * + * subprograms called + * + * minpack-supplied ... dpmpar,mp_enorm,qrsolv + * + * fortran-supplied ... dabs,mp_dmax1,dmin1,dsqrt + * + * argonne national laboratory. minpack project. march 1980. + * burton s. garbow, kenneth e. hillstrom, jorge j. more + * + * ********** + */ + int i,iter,ij,jj,j,jm1,jp1,k,l,nsing; + double dxnorm,fp,gnorm,parc,parl,paru; + double sum,temp; + static double zero = 0.0; + /* static double one = 1.0; */ + static double p1 = 0.1; + static double p001 = 0.001; + + /* + * compute and store in x the gauss-newton direction. if the + * jacobian is rank-deficient, obtain a least squares solution. + */ + nsing = n; + jj = 0; + for (j=0; j<n; j++) { + wa1[j] = qtb[j]; + if ((r[jj] == zero) && (nsing == n)) + nsing = j; + if (nsing < n) + wa1[j] = zero; + jj += ldr+1; /* [j+ldr*j] */ + } + + if (nsing >= 1) { + for (k=0; k<nsing; k++) + { + j = nsing - k - 1; + wa1[j] = wa1[j]/r[j+ldr*j]; + temp = wa1[j]; + jm1 = j - 1; + if (jm1 >= 0) + { + ij = ldr * j; + for (i=0; i<=jm1; i++) + { + wa1[i] -= r[ij]*temp; + ij += 1; + } + } + } + } + + for (j=0; j<n; j++) { + l = ipvt[j]; + x[l] = wa1[j]; + } + /* + * initialize the iteration counter. + * evaluate the function at the origin, and test + * for acceptance of the gauss-newton direction. + */ + iter = 0; + for (j=0; j<n; j++) + wa2[j] = diag[ifree[j]]*x[j]; + dxnorm = mp_enorm(n,wa2); + fp = dxnorm - delta; + if (fp <= p1*delta) { + goto L220; + } + /* + * if the jacobian is not rank deficient, the newton + * step provides a lower bound, parl, for the zero of + * the function. otherwise set this bound to zero. + */ + parl = zero; + if (nsing >= n) { + for (j=0; j<n; j++) + { + l = ipvt[j]; + wa1[j] = diag[ifree[l]]*(wa2[l]/dxnorm); + } + jj = 0; + for (j=0; j<n; j++) + { + sum = zero; + jm1 = j - 1; + if (jm1 >= 0) + { + ij = jj; + for (i=0; i<=jm1; i++) + { + sum += r[ij]*wa1[i]; + ij += 1; + } + } + wa1[j] = (wa1[j] - sum)/r[j+ldr*j]; + jj += ldr; /* [i+ldr*j] */ + } + temp = mp_enorm(n,wa1); + parl = ((fp/delta)/temp)/temp; + } + /* + * calculate an upper bound, paru, for the zero of the function. + */ + jj = 0; + for (j=0; j<n; j++) { + sum = zero; + ij = jj; + for (i=0; i<=j; i++) + { + sum += r[ij]*qtb[i]; + ij += 1; + } + l = ipvt[j]; + wa1[j] = sum/diag[ifree[l]]; + jj += ldr; /* [i+ldr*j] */ + } + gnorm = mp_enorm(n,wa1); + paru = gnorm/delta; + if (paru == zero) + paru = MP_DWARF/mp_dmin1(delta,p1); + /* + * if the input par lies outside of the interval (parl,paru), + * set par to the closer endpoint. + */ + *par = mp_dmax1( *par,parl); + *par = mp_dmin1( *par,paru); + if (*par == zero) + *par = gnorm/dxnorm; + + /* + * beginning of an iteration. + */ + L150: + iter += 1; + /* + * evaluate the function at the current value of par. + */ + if (*par == zero) + *par = mp_dmax1(MP_DWARF,p001*paru); + temp = sqrt( *par ); + for (j=0; j<n; j++) + wa1[j] = temp*diag[ifree[j]]; + mp_qrsolv(n,r,ldr,ipvt,wa1,qtb,x,sdiag,wa2); + for (j=0; j<n; j++) + wa2[j] = diag[ifree[j]]*x[j]; + dxnorm = mp_enorm(n,wa2); + temp = fp; + fp = dxnorm - delta; + /* + * if the function is small enough, accept the current value + * of par. also test for the exceptional cases where parl + * is zero or the number of iterations has reached 10. + */ + if ((fabs(fp) <= p1*delta) + || ((parl == zero) && (fp <= temp) && (temp < zero)) + || (iter == 10)) + goto L220; + /* + * compute the newton correction. + */ + for (j=0; j<n; j++) { + l = ipvt[j]; + wa1[j] = diag[ifree[l]]*(wa2[l]/dxnorm); + } + jj = 0; + for (j=0; j<n; j++) { + wa1[j] = wa1[j]/sdiag[j]; + temp = wa1[j]; + jp1 = j + 1; + if (jp1 < n) + { + ij = jp1 + jj; + for (i=jp1; i<n; i++) + { + wa1[i] -= r[ij]*temp; + ij += 1; /* [i+ldr*j] */ + } + } + jj += ldr; /* ldr*j */ + } + temp = mp_enorm(n,wa1); + parc = ((fp/delta)/temp)/temp; + /* + * depending on the sign of the function, update parl or paru. + */ + if (fp > zero) + parl = mp_dmax1(parl, *par); + if (fp < zero) + paru = mp_dmin1(paru, *par); + /* + * compute an improved estimate for par. + */ + *par = mp_dmax1(parl, *par + parc); + /* + * end of an iteration. + */ + goto L150; + + L220: + /* + * termination. + */ + if (iter == 0) + *par = zero; + /* + * last card of subroutine lmpar. + */ +} + + +/************************enorm.c*************************/ + +static +double mp_enorm(int n, double *x) +{ + /* + * ********** + * + * function enorm + * + * given an n-vector x, this function calculates the + * euclidean norm of x. + * + * the euclidean norm is computed by accumulating the sum of + * squares in three different sums. the sums of squares for the + * small and large components are scaled so that no overflows + * occur. non-destructive underflows are permitted. underflows + * and overflows do not occur in the computation of the unscaled + * sum of squares for the intermediate components. + * the definitions of small, intermediate and large components + * depend on two constants, rdwarf and rgiant. the main + * restrictions on these constants are that rdwarf**2 not + * underflow and rgiant**2 not overflow. the constants + * given here are suitable for every known computer. + * + * the function statement is + * + * double precision function enorm(n,x) + * + * where + * + * n is a positive integer input variable. + * + * x is an input array of length n. + * + * subprograms called + * + * fortran-supplied ... dabs,dsqrt + * + * argonne national laboratory. minpack project. march 1980. + * burton s. garbow, kenneth e. hillstrom, jorge j. more + * + * ********** + */ + int i; + double agiant,floatn,s1,s2,s3,xabs,x1max,x3max; + double ans, temp; + double rdwarf = MP_RDWARF; + double rgiant = MP_RGIANT; + static double zero = 0.0; + static double one = 1.0; + + s1 = zero; + s2 = zero; + s3 = zero; + x1max = zero; + x3max = zero; + floatn = n; + agiant = rgiant/floatn; + + for (i=0; i<n; i++) { + xabs = fabs(x[i]); + if ((xabs > rdwarf) && (xabs < agiant)) + { + /* + * sum for intermediate components. + */ + s2 += xabs*xabs; + continue; + } + + if (xabs > rdwarf) + { + /* + * sum for large components. + */ + if (xabs > x1max) + { + temp = x1max/xabs; + s1 = one + s1*temp*temp; + x1max = xabs; + } + else + { + temp = xabs/x1max; + s1 += temp*temp; + } + continue; + } + /* + * sum for small components. + */ + if (xabs > x3max) + { + temp = x3max/xabs; + s3 = one + s3*temp*temp; + x3max = xabs; + } + else + { + if (xabs != zero) + { + temp = xabs/x3max; + s3 += temp*temp; + } + } + } + /* + * calculation of norm. + */ + if (s1 != zero) { + temp = s1 + (s2/x1max)/x1max; + ans = x1max*sqrt(temp); + return(ans); + } + if (s2 != zero) { + if (s2 >= x3max) + temp = s2*(one+(x3max/s2)*(x3max*s3)); + else + temp = x3max*((s2/x3max)+(x3max*s3)); + ans = sqrt(temp); + } + else + { + ans = x3max*sqrt(s3); + } + return(ans); + /* + * last card of function enorm. + */ +} + +/************************lmmisc.c*************************/ + +static +double mp_dmax1(double a, double b) +{ + if (a >= b) + return(a); + else + return(b); +} + +static +double mp_dmin1(double a, double b) +{ + if (a <= b) + return(a); + else + return(b); +} + +static +int mp_min0(int a, int b) +{ + if (a <= b) + return(a); + else + return(b); +} + +/************************covar.c*************************/ +/* +c ********** +c +c subroutine covar +c +c given an m by n matrix a, the problem is to determine +c the covariance matrix corresponding to a, defined as +c +c t +c inverse(a *a) . +c +c this subroutine completes the solution of the problem +c if it is provided with the necessary information from the +c qr factorization, with column pivoting, of a. that is, if +c a*p = q*r, where p is a permutation matrix, q has orthogonal +c columns, and r is an upper triangular matrix with diagonal +c elements of nonincreasing magnitude, then covar expects +c the full upper triangle of r and the permutation matrix p. +c the covariance matrix is then computed as +c +c t t +c p*inverse(r *r)*p . +c +c if a is nearly rank deficient, it may be desirable to compute +c the covariance matrix corresponding to the linearly independent +c columns of a. to define the numerical rank of a, covar uses +c the tolerance tol. if l is the largest integer such that +c +c abs(r(l,l)) .gt. tol*abs(r(1,1)) , +c +c then covar computes the covariance matrix corresponding to +c the first l columns of r. for k greater than l, column +c and row ipvt(k) of the covariance matrix are set to zero. +c +c the subroutine statement is +c +c subroutine covar(n,r,ldr,ipvt,tol,wa) +c +c where +c +c n is a positive integer input variable set to the order of r. +c +c r is an n by n array. on input the full upper triangle must +c contain the full upper triangle of the matrix r. on output +c r contains the square symmetric covariance matrix. +c +c ldr is a positive integer input variable not less than n +c which specifies the leading dimension of the array r. +c +c ipvt is an integer input array of length n which defines the +c permutation matrix p such that a*p = q*r. column j of p +c is column ipvt(j) of the identity matrix. +c +c tol is a nonnegative input variable used to define the +c numerical rank of a in the manner described above. +c +c wa is a work array of length n. +c +c subprograms called +c +c fortran-supplied ... dabs +c +c argonne national laboratory. minpack project. august 1980. +c burton s. garbow, kenneth e. hillstrom, jorge j. more +c +c ********** +*/ + +static +int mp_covar(int n, double *r, int ldr, int *ipvt, double tol, double *wa) +{ + int i, ii, j, jj, k, l; + int kk, kj, ji, j0, k0, jj0; + int sing; + double one = 1.0, temp, tolr, zero = 0.0; + + /* + * form the inverse of r in the full upper triangle of r. + */ + +#if 0 + for (j=0; j<n; j++) { + for (i=0; i<n; i++) { + printf("%f ", r[j*ldr+i]); + } + printf("\n"); + } +#endif + + tolr = tol*fabs(r[0]); + l = -1; + for (k=0; k<n; k++) { + kk = k*ldr + k; + if (fabs(r[kk]) <= tolr) break; + + r[kk] = one/r[kk]; + for (j=0; j<k; j++) { + kj = k*ldr + j; + temp = r[kk] * r[kj]; + r[kj] = zero; + + k0 = k*ldr; j0 = j*ldr; + for (i=0; i<=j; i++) { + r[k0+i] += (-temp*r[j0+i]); + } + } + l = k; + } + + /* + * Form the full upper triangle of the inverse of (r transpose)*r + * in the full upper triangle of r + */ + + if (l >= 0) { + for (k=0; k <= l; k++) { + k0 = k*ldr; + + for (j=0; j<k; j++) { + temp = r[k*ldr+j]; + + j0 = j*ldr; + for (i=0; i<=j; i++) { + r[j0+i] += temp*r[k0+i]; + } + } + + temp = r[k0+k]; + for (i=0; i<=k; i++) { + r[k0+i] *= temp; + } + } + } + + /* + * For the full lower triangle of the covariance matrix + * in the strict lower triangle or and in wa + */ + for (j=0; j<n; j++) { + jj = ipvt[j]; + sing = (j > l); + j0 = j*ldr; + jj0 = jj*ldr; + for (i=0; i<=j; i++) { + ji = j0+i; + + if (sing) r[ji] = zero; + ii = ipvt[i]; + if (ii > jj) r[jj0+ii] = r[ji]; + if (ii < jj) r[ii*ldr+jj] = r[ji]; + } + wa[jj] = r[j0+j]; + } + + /* + * Symmetrize the covariance matrix in r + */ + for (j=0; j<n; j++) { + j0 = j*ldr; + for (i=0; i<j; i++) { + r[j0+i] = r[i*ldr+j]; + } + r[j0+j] = wa[j]; + } + +#if 0 + for (j=0; j<n; j++) { + for (i=0; i<n; i++) { + printf("%f ", r[j*ldr+i]); + } + printf("\n"); + } +#endif + + return 0; +}
http://git-wip-us.apache.org/repos/asf/impala/blob/0936e329/be/src/thirdparty/mpfit/mpfit.h ---------------------------------------------------------------------- diff --git a/be/src/thirdparty/mpfit/mpfit.h b/be/src/thirdparty/mpfit/mpfit.h new file mode 100644 index 0000000..47469c8 --- /dev/null +++ b/be/src/thirdparty/mpfit/mpfit.h @@ -0,0 +1,198 @@ +/* + * MINPACK-1 Least Squares Fitting Library + * + * Original public domain version by B. Garbow, K. Hillstrom, J. More' + * (Argonne National Laboratory, MINPACK project, March 1980) + * + * Tranlation to C Language by S. Moshier (moshier.net) + * + * Enhancements and packaging by C. Markwardt + * (comparable to IDL fitting routine MPFIT + * see http://cow.physics.wisc.edu/~craigm/idl/idl.html) + */ + +/* Header file defining constants, data structures and functions of + mpfit library + $Id: mpfit.h,v 1.16 2016/06/02 19:14:16 craigm Exp $ +*/ + +#ifndef MPFIT_H +#define MPFIT_H + +/* This is a C library. Allow compilation with a C++ compiler */ +#ifdef __cplusplus +extern "C" { +#endif + +/* MPFIT version string */ +#define MPFIT_VERSION "1.3" + +/* Definition of a parameter constraint structure */ +struct mp_par_struct { + int fixed; /* 1 = fixed; 0 = free */ + int limited[2]; /* 1 = low/upper limit; 0 = no limit */ + double limits[2]; /* lower/upper limit boundary value */ + + char *parname; /* Name of parameter, or 0 for none */ + double step; /* Step size for finite difference */ + double relstep; /* Relative step size for finite difference */ + int side; /* Sidedness of finite difference derivative + 0 - one-sided derivative computed automatically + 1 - one-sided derivative (f(x+h) - f(x) )/h + -1 - one-sided derivative (f(x) - f(x-h))/h + 2 - two-sided derivative (f(x+h) - f(x-h))/(2*h) + 3 - user-computed analytical derivatives + */ + int deriv_debug; /* Derivative debug mode: 1 = Yes; 0 = No; + + If yes, compute both analytical and numerical + derivatives and print them to the console for + comparison. + + NOTE: when debugging, do *not* set side = 3, + but rather to the kind of numerical derivative + you want to compare the user-analytical one to + (0, 1, -1, or 2). + */ + double deriv_reltol; /* Relative tolerance for derivative debug + printout */ + double deriv_abstol; /* Absolute tolerance for derivative debug + printout */ +}; + +/* Just a placeholder - do not use!! */ +typedef void (*mp_iterproc)(void); + +/* Definition of MPFIT configuration structure */ +struct mp_config_struct { + /* NOTE: the user may set the value explicitly; OR, if the passed + value is zero, then the "Default" value will be substituted by + mpfit(). */ + double ftol; /* Relative chi-square convergence criterium Default: 1e-10 */ + double xtol; /* Relative parameter convergence criterium Default: 1e-10 */ + double gtol; /* Orthogonality convergence criterium Default: 1e-10 */ + double epsfcn; /* Finite derivative step size Default: MP_MACHEP0 */ + double stepfactor; /* Initial step bound Default: 100.0 */ + double covtol; /* Range tolerance for covariance calculation Default: 1e-14 */ + int maxiter; /* Maximum number of iterations. If maxiter == MP_NO_ITER, + then basic error checking is done, and parameter + errors/covariances are estimated based on input + parameter values, but no fitting iterations are done. + Default: 200 + */ +#define MP_NO_ITER (-1) /* No iterations, just checking */ + int maxfev; /* Maximum number of function evaluations, or 0 for no limit + Default: 0 (no limit) */ + int nprint; /* Default: 1 */ + int douserscale;/* Scale variables by user values? + 1 = yes, user scale values in diag; + 0 = no, variables scaled internally (Default) */ + int nofinitecheck; /* Disable check for infinite quantities from user? + 0 = do not perform check (Default) + 1 = perform check + */ + mp_iterproc iterproc; /* Placeholder pointer - must set to 0 */ + +}; + +/* Definition of results structure, for when fit completes */ +struct mp_result_struct { + double bestnorm; /* Final chi^2 */ + double orignorm; /* Starting value of chi^2 */ + int niter; /* Number of iterations */ + int nfev; /* Number of function evaluations */ + int status; /* Fitting status code */ + + int npar; /* Total number of parameters */ + int nfree; /* Number of free parameters */ + int npegged; /* Number of pegged parameters */ + int nfunc; /* Number of residuals (= num. of data points) */ + + double *resid; /* Final residuals + nfunc-vector, or 0 if not desired */ + double *xerror; /* Final parameter uncertainties (1-sigma) + npar-vector, or 0 if not desired */ + double *covar; /* Final parameter covariance matrix + npar x npar array, or 0 if not desired */ + char version[20]; /* MPFIT version string */ +}; + +/* Convenience typedefs */ +typedef struct mp_par_struct mp_par; +typedef struct mp_config_struct mp_config; +typedef struct mp_result_struct mp_result; + +/* Enforce type of fitting function */ +typedef int (*mp_func)(int m, /* Number of functions (elts of fvec) */ + int n, /* Number of variables (elts of x) */ + double *x, /* I - Parameters */ + double *fvec, /* O - function values */ + double **dvec, /* O - function derivatives (optional)*/ + void *private_data); /* I/O - function private data*/ + +/* Error codes */ +#define MP_ERR_INPUT (0) /* General input parameter error */ +#define MP_ERR_NAN (-16) /* User function produced non-finite values */ +#define MP_ERR_FUNC (-17) /* No user function was supplied */ +#define MP_ERR_NPOINTS (-18) /* No user data points were supplied */ +#define MP_ERR_NFREE (-19) /* No free parameters */ +#define MP_ERR_MEMORY (-20) /* Memory allocation error */ +#define MP_ERR_INITBOUNDS (-21) /* Initial values inconsistent w constraints*/ +#define MP_ERR_BOUNDS (-22) /* Initial constraints inconsistent */ +#define MP_ERR_PARAM (-23) /* General input parameter error */ +#define MP_ERR_DOF (-24) /* Not enough degrees of freedom */ + +/* Potential success status codes */ +#define MP_OK_CHI (1) /* Convergence in chi-square value */ +#define MP_OK_PAR (2) /* Convergence in parameter value */ +#define MP_OK_BOTH (3) /* Both MP_OK_PAR and MP_OK_CHI hold */ +#define MP_OK_DIR (4) /* Convergence in orthogonality */ +#define MP_MAXITER (5) /* Maximum number of iterations reached */ +#define MP_FTOL (6) /* ftol is too small; no further improvement*/ +#define MP_XTOL (7) /* xtol is too small; no further improvement*/ +#define MP_GTOL (8) /* gtol is too small; no further improvement*/ + +/* Double precision numeric constants */ +#define MP_MACHEP0 2.2204460e-16 +#define MP_DWARF 2.2250739e-308 +#define MP_GIANT 1.7976931e+308 + +#if 0 +/* Float precision */ +#define MP_MACHEP0 1.19209e-07 +#define MP_DWARF 1.17549e-38 +#define MP_GIANT 3.40282e+38 +#endif + +#define MP_RDWARF (sqrt(MP_DWARF*1.5)*10) +#define MP_RGIANT (sqrt(MP_GIANT)*0.1) + + +/* External function prototype declarations */ +extern int mpfit(mp_func funct, int m, int npar, + double *xall, mp_par *pars, mp_config *config, + void *private_data, + mp_result *result); + + + +/* C99 uses isfinite() instead of finite() */ +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L +#define mpfinite(x) isfinite(x) + +/* Microsoft C uses _finite(x) instead of finite(x) */ +#elif defined(_MSC_VER) && _MSC_VER +#include <float.h> +#define mpfinite(x) _finite(x) + +/* Default is to assume that compiler/library has finite() function */ +#else +#define mpfinite(x) finite(x) + +#endif + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* MPFIT_H */ http://git-wip-us.apache.org/repos/asf/impala/blob/0936e329/be/src/util/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/be/src/util/CMakeLists.txt b/be/src/util/CMakeLists.txt index 08002ed..c9e0f3c 100644 --- a/be/src/util/CMakeLists.txt +++ b/be/src/util/CMakeLists.txt @@ -17,6 +17,7 @@ set(SQUEASEL_SRC_DIR "${CMAKE_SOURCE_DIR}/be/src/thirdparty/squeasel") set(MUSTACHE_SRC_DIR "${CMAKE_SOURCE_DIR}/be/src/thirdparty/mustache") +set(MPFIT_SRC_DIR "${CMAKE_SOURCE_DIR}/be/src/thirdparty/mpfit") # where to put generated libraries set(LIBRARY_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}/util") @@ -58,6 +59,7 @@ add_library(Util min-max-filter.cc min-max-filter-ir.cc minidump.cc + mpfit-util.cc network-util.cc openssl-util.cc os-info.cc @@ -86,6 +88,7 @@ add_library(Util ${SQUEASEL_SRC_DIR}/squeasel.c webserver.cc ${MUSTACHE_SRC_DIR}/mustache.cc + ${MPFIT_SRC_DIR}/mpfit.c ) add_dependencies(Util gen-deps gen_ir_descriptions) http://git-wip-us.apache.org/repos/asf/impala/blob/0936e329/be/src/util/mpfit-util.cc ---------------------------------------------------------------------- diff --git a/be/src/util/mpfit-util.cc b/be/src/util/mpfit-util.cc new file mode 100644 index 0000000..0f001ad --- /dev/null +++ b/be/src/util/mpfit-util.cc @@ -0,0 +1,82 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "util/mpfit-util.h" + +#include <cstring> + +using namespace impala; +using std::string; +using std::function; + +ObjectiveFunction::ObjectiveFunction(string name, int num_params, + function<double (double, const double*)> fn) + : name_(name), num_params_(num_params), fn_(fn), num_points_(0), xs_(nullptr), + ys_(nullptr) { + DCHECK_GT(num_params, 0); +} + +/// User-supplied function called by MpFit during curve fitting. MpFit requires this +/// user-supplied function to compute "dy = y - fn(x)" for every known data point given +/// the objective function 'fn' and the current list of parameter values 'params'. +/// Declared here so it can be used in ObjectiveFunction below. +/// 'num_points' is the number of data points for fitting +/// 'num_params' is the number of parameters in the objective function +/// 'params' is an input array with 'num_params' parameter values +/// 'dy' is an output parameter allocated by MpFit. It contains the "delta y" for each +/// data point. This function sets dy[i] = y[i] - fn(x[i]) for each data point i and +/// for the objective function 'fn'. +/// 'obj_func' points to an ObjectiveFunction which contains the objective function +/// and the data points +/// The 'dvec' parameter is not important for our purposes. See the MpFit documentation. +/// MpFit allows these user-supplied functions to indicate errors by returning a non-zero +/// value. Our objective functions are simple and we always return 0. +static int ComputeDeltaY(int num_points, int num_params, double* params, double* dy, + double** dvec, void* obj_func); + +bool ObjectiveFunction::LmsFit(const double* xs, const double* ys, int num_points) { + DCHECK(xs != nullptr); + DCHECK(ys != nullptr); + DCHECK_GE(num_points, 0); + num_points_ = num_points; + xs_ = xs; + ys_ = ys; + + // Initialize function parameters. + params_.reset(new (std::nothrow) double[num_params_]); + if (params_ == nullptr) return false; + for (int i = 0; i < num_params_; ++i) params_.get()[i] = 1.0; + + struct mp_config_struct config; + memset(&config, 0, sizeof(config)); + // Maximum number of calls to SampledNdvMpFit(). Value determined empirically. + config.maxfev = 1000; + // Maximum number of fitting iterations. Value determined empirically. + config.maxiter = 200; + memset(&result_, 0, sizeof(result_)); + int ret = mpfit(ComputeDeltaY, num_points, num_params_, params_.get(), nullptr, + &config, this, &result_); + // Any positive integer indicates success. + return ret > 0; +} + +int ComputeDeltaY(int num_points, int num_params, double* params, double* dy, + double** dvec, void* obj_func) { + ObjectiveFunction* fn = reinterpret_cast<ObjectiveFunction*>(obj_func); + for (int i = 0; i < num_points; ++i) dy[i] = fn->GetDeltaY(i, params); + return 0; +} http://git-wip-us.apache.org/repos/asf/impala/blob/0936e329/be/src/util/mpfit-util.h ---------------------------------------------------------------------- diff --git a/be/src/util/mpfit-util.h b/be/src/util/mpfit-util.h new file mode 100644 index 0000000..9eac3c2 --- /dev/null +++ b/be/src/util/mpfit-util.h @@ -0,0 +1,98 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + +#ifndef IMPALA_MPFIT_UTIL_H +#define IMPALA_MPFIT_UTIL_H + +#include <functional> +#include <memory> +#include <string> + +#include "common/logging.h" +#include "common/status.h" +#include "thirdparty/mpfit/mpfit.h" + +namespace impala { + +/// Objective function to be fit using the MpFit library. An objective function has +/// exactly one 'x' variable and any number of additional non-variable parameters whose +/// values are unknown. The purpose of curve fitting is to find the best values for those +/// parameters given an objective function and a series of x/y data points. +/// This class contains the objective function as well as the data points because that's +/// the most natural setup for the MpFit API. +/// Calling LmsFit() determines the parameters for the objective function. +/// Calling GetY() computes the 'y' value for a given 'x' using the function parameters. +/// The objective function is of type function<double (double, const double*)> +/// By convention, the first argument is the value of the 'x' variable and the second +/// argument is an array of function parameters which are determined during fitting. +class ObjectiveFunction { + public: + ObjectiveFunction(std::string name, int num_params, + std::function<double (double, const double*)> fn); + + /// Performs least mean squares (LMS) curve fitting using the MpFit library + /// against the provided x/y data points. + /// Returns true if fitting was successful, false otherwise. + bool LmsFit(const double* xs, const double* ys, int num_points) WARN_UNUSED_RESULT; + + /// Evaluates the objective function over the given 'x' value. + double GetY(int64_t x) const { + DCHECK(params_ != nullptr); + return fn_(x, params_.get()); + } + + /// Returns the difference between the y value of data point 'pidx' and the + /// y value of the objective function with the given parameters over the x value + /// of the same point. + double GetDeltaY(int pidx, const double* params) const { + DCHECK_LT(pidx, num_points_); + return ys_[pidx] - fn_(xs_[pidx], params); + } + + /// Returns the Chi-Square of fitting. This is an indication of how well the function + /// fits. Lower is better. Valid to call after LmsFit(). + double GetError() const { + DCHECK(params_ != nullptr); + return result_.bestnorm; + } + + private: + /// Human-readable name of this function. Used for debugging. + std::string name_; + + /// Function parameters to be determined by fitting. + const int num_params_; + std::unique_ptr<double> params_; + + /// MPFit result structure. Populated by in LmsFit(). All pointers in this structure + /// are optional and must be allocated and owned by the caller of mpfit(). Passing + /// nullptr indicates to MPFit that those fields should not be populated. + mp_result result_; + + /// Objective function whose parameters should be fit to the data points. + std::function<double (double, const double*)> fn_; + + /// Known x/y data points. Memory not owned. + int num_points_; + const double* xs_; + const double* ys_; +}; + +} + +#endif http://git-wip-us.apache.org/repos/asf/impala/blob/0936e329/bin/rat_exclude_files.txt ---------------------------------------------------------------------- diff --git a/bin/rat_exclude_files.txt b/bin/rat_exclude_files.txt index 73cd073..fdb1926 100644 --- a/bin/rat_exclude_files.txt +++ b/bin/rat_exclude_files.txt @@ -19,8 +19,9 @@ testdata/__init__.py tests/__init__.py www/index.html -# See LICENSE.txt +# See $IMPALA_HOME/LICENSE.txt be/src/gutil/* +be/src/thirdparty/mpfit/* be/src/kudu/gutil www/highlight/* www/DataTables*/* http://git-wip-us.apache.org/repos/asf/impala/blob/0936e329/bin/run_clang_tidy.sh ---------------------------------------------------------------------- diff --git a/bin/run_clang_tidy.sh b/bin/run_clang_tidy.sh index ae389a9..e879b35 100755 --- a/bin/run_clang_tidy.sh +++ b/bin/run_clang_tidy.sh @@ -35,7 +35,10 @@ then echo "WARNING: compile failed" >&2 fi -DIRS=$(ls -d "${IMPALA_HOME}/be/src/"*/ | grep -v gutil | grep -v kudu | tr '\n' ' ') +DIRS=$(ls -d "${IMPALA_HOME}/be/src/"*/ | grep -v gutil | grep -v kudu |\ + grep -v thirdparty | tr '\n' ' ') +# Include/exclude select thirdparty dirs. +DIRS=$DIRS$(ls -d "${IMPALA_HOME}/be/src/thirdparty/"*/ | grep -v mpfit | tr '\n' ' ') PIPE_DIRS=$(echo "${DIRS}" | tr ' ' '|') # Reduce the concurrency to one less than the number of cores in the system. Note than http://git-wip-us.apache.org/repos/asf/impala/blob/0936e329/fe/src/main/java/org/apache/impala/analysis/FunctionCallExpr.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/analysis/FunctionCallExpr.java b/fe/src/main/java/org/apache/impala/analysis/FunctionCallExpr.java index 9574a02..12a34f7 100644 --- a/fe/src/main/java/org/apache/impala/analysis/FunctionCallExpr.java +++ b/fe/src/main/java/org/apache/impala/analysis/FunctionCallExpr.java @@ -469,8 +469,6 @@ public class FunctionCallExpr extends Expr { return; } - Type[] argTypes = collectChildReturnTypes(); - // User needs DB access. Db db = analyzer.getDb(fnName_.getDb(), Privilege.VIEW_METADATA, true); if (!db.containsFunction(fnName_.getFunction())) { @@ -482,8 +480,7 @@ public class FunctionCallExpr extends Expr { // There is no version of COUNT() that takes more than 1 argument but after // the rewrite, we only need count(*). // TODO: fix how we rewrite count distinct. - argTypes = new Type[0]; - Function searchDesc = new Function(fnName_, argTypes, Type.INVALID, false); + Function searchDesc = new Function(fnName_, new Type[0], Type.INVALID, false); fn_ = db.getFunction(searchDesc, Function.CompareMode.IS_NONSTRICT_SUPERTYPE_OF); type_ = fn_.getReturnType(); // Make sure BE doesn't see any TYPE_NULL exprs @@ -506,6 +503,28 @@ public class FunctionCallExpr extends Expr { "AVG requires a numeric or timestamp parameter: " + toSql()); } + // SAMPLED_NDV() is only valid with two children. Invocations with an invalid number + // of children are gracefully handled when resolving the function signature. + if (fnName_.getFunction().equalsIgnoreCase("sampled_ndv") + && children_.size() == 2) { + if (!(children_.get(1) instanceof NumericLiteral)) { + throw new AnalysisException( + "Second parameter of SAMPLED_NDV() must be a numeric literal in [0,1]: " + + children_.get(1).toSql()); + } + NumericLiteral samplePerc = (NumericLiteral) children_.get(1); + if (samplePerc.getDoubleValue() < 0 || samplePerc.getDoubleValue() > 1.0) { + throw new AnalysisException( + "Second parameter of SAMPLED_NDV() must be a numeric literal in [0,1]: " + + samplePerc.toSql()); + } + // Numeric literals with a decimal point are analyzed as decimals. Without this + // cast we might resolve to the wrong function because there is no exactly + // matching signature with decimal as the second argument. + children_.set(1, samplePerc.uncheckedCastTo(Type.DOUBLE)); + } + + Type[] argTypes = collectChildReturnTypes(); Function searchDesc = new Function(fnName_, argTypes, Type.INVALID, false); fn_ = db.getFunction(searchDesc, Function.CompareMode.IS_NONSTRICT_SUPERTYPE_OF); if (fn_ == null || (!isInternalFnCall_ && !fn_.userVisible())) {
