http://git-wip-us.apache.org/repos/asf/impala/blob/0936e329/be/src/thirdparty/mpfit/mpfit.c
----------------------------------------------------------------------
diff --git a/be/src/thirdparty/mpfit/mpfit.c b/be/src/thirdparty/mpfit/mpfit.c
new file mode 100644
index 0000000..1ab9abb
--- /dev/null
+++ b/be/src/thirdparty/mpfit/mpfit.c
@@ -0,0 +1,2305 @@
+/* 
+ * MINPACK-1 Least Squares Fitting Library
+ *
+ * Original public domain version by B. Garbow, K. Hillstrom, J. More'
+ *   (Argonne National Laboratory, MINPACK project, March 1980)
+ * See the file DISCLAIMER for copyright information.
+ * 
+ * Tranlation to C Language by S. Moshier (moshier.net)
+ * 
+ * Enhancements and packaging by C. Markwardt
+ *   (comparable to IDL fitting routine MPFIT
+ *    see http://cow.physics.wisc.edu/~craigm/idl/idl.html)
+ */
+
+/* Main mpfit library routines (double precision) 
+   $Id: mpfit.c,v 1.24 2013/04/23 18:37:38 craigm Exp $
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+#include "mpfit.h"
+
+/* Forward declarations of functions in this module */
+static int mp_fdjac2(mp_func funct,
+             int m, int n, int *ifree, int npar, double *x, double *fvec,
+             double *fjac, int ldfjac, double epsfcn,
+             double *wa, void *priv, int *nfev,
+             double *step, double *dstep, int *dside,
+             int *qulimited, double *ulimit,
+             int *ddebug, double *ddrtol, double *ddatol,
+             double *wa2, double **dvecptr);
+static void mp_qrfac(int m, int n, double *a, int lda, 
+             int pivot, int *ipvt, int lipvt,
+             double *rdiag, double *acnorm, double *wa);
+static void mp_qrsolv(int n, double *r, int ldr, int *ipvt, double *diag,
+              double *qtb, double *x, double *sdiag, double *wa);
+static void mp_lmpar(int n, double *r, int ldr, int *ipvt, int *ifree, double 
*diag,
+             double *qtb, double delta, double *par, double *x,
+             double *sdiag, double *wa1, double *wa2);
+static double mp_enorm(int n, double *x);
+static double mp_dmax1(double a, double b);
+static double mp_dmin1(double a, double b);
+static int mp_min0(int a, int b);
+static int mp_covar(int n, double *r, int ldr, int *ipvt, double tol, double 
*wa);
+
+/* Macro to call user function */
+#define mp_call(funct, m, n, x, fvec, dvec, priv) 
(*(funct))(m,n,x,fvec,dvec,priv)
+
+/* Macro to safely allocate memory */
+#define mp_malloc(dest,type,size) \
+  dest = (type *) malloc( sizeof(type)*size ); \
+  if (dest == 0) { \
+    info = MP_ERR_MEMORY; \
+    goto CLEANUP; \
+  } else { \
+    int _k; \
+    for (_k=0; _k<(size); _k++) dest[_k] = 0; \
+  } 
+
+/*
+*     **********
+*
+*     subroutine mpfit
+*
+*     the purpose of mpfit is to minimize the sum of the squares of
+*     m nonlinear functions in n variables by a modification of
+*     the levenberg-marquardt algorithm. the user must provide a
+*     subroutine which calculates the functions. the jacobian is
+*     then calculated by a finite-difference approximation.
+*
+*     mp_funct funct - function to be minimized
+*     int m          - number of data points
+*     int npar       - number of fit parameters
+*     double *xall   - array of n initial parameter values
+*                      upon return, contains adjusted parameter values
+*     mp_par *pars   - array of npar structures specifying constraints;
+*                      or 0 (null pointer) for unconstrained fitting
+*                      [ see README and mpfit.h for definition & use of mp_par]
+*     mp_config *config - pointer to structure which specifies the
+*                      configuration of mpfit(); or 0 (null pointer)
+*                      if the default configuration is to be used.
+*                      See README and mpfit.h for definition and use
+*                      of config.
+*     void *private  - any private user data which is to be passed directly
+*                      to funct without modification by mpfit().
+*     mp_result *result - pointer to structure, which upon return, contains
+*                      the results of the fit.  The user should zero this
+*                      structure.  If any of the array values are to be 
+*                      returned, the user should allocate storage for them
+*                      and assign the corresponding pointer in *result.
+*                      Upon return, *result will be updated, and
+*                      any of the non-null arrays will be filled.
+*
+*
+* FORTRAN DOCUMENTATION BELOW
+*
+*
+*     the subroutine statement is
+*
+*      subroutine lmdif(fcn,m,n,x,fvec,ftol,xtol,gtol,maxfev,epsfcn,
+*                       diag,mode,factor,nprint,info,nfev,fjac,
+*                       ldfjac,ipvt,qtf,wa1,wa2,wa3,wa4)
+*
+*     where
+*
+*      fcn is the name of the user-supplied subroutine which
+*        calculates the functions. fcn must be declared
+*        in an external statement in the user calling
+*        program, and should be written as follows.
+*
+*        subroutine fcn(m,n,x,fvec,iflag)
+*        integer m,n,iflag
+*        double precision x(n),fvec(m)
+*        ----------
+*        calculate the functions at x and
+*        return this vector in fvec.
+*        ----------
+*        return
+*        end
+*
+*        the value of iflag should not be changed by fcn unless
+*        the user wants to terminate execution of lmdif.
+*        in this case set iflag to a negative integer.
+*
+*      m is a positive integer input variable set to the number
+*        of functions.
+*
+*      n is a positive integer input variable set to the number
+*        of variables. n must not exceed m.
+*
+*      x is an array of length n. on input x must contain
+*        an initial estimate of the solution vector. on output x
+*        contains the final estimate of the solution vector.
+*
+*      fvec is an output array of length m which contains
+*        the functions evaluated at the output x.
+*
+*      ftol is a nonnegative input variable. termination
+*        occurs when both the actual and predicted relative
+*        reductions in the sum of squares are at most ftol.
+*        therefore, ftol measures the relative error desired
+*        in the sum of squares.
+*
+*      xtol is a nonnegative input variable. termination
+*        occurs when the relative error between two consecutive
+*        iterates is at most xtol. therefore, xtol measures the
+*        relative error desired in the approximate solution.
+*
+*      gtol is a nonnegative input variable. termination
+*        occurs when the cosine of the angle between fvec and
+*        any column of the jacobian is at most gtol in absolute
+*        value. therefore, gtol measures the orthogonality
+*        desired between the function vector and the columns
+*        of the jacobian.
+*
+*      maxfev is a positive integer input variable. termination
+*        occurs when the number of calls to fcn is at least
+*        maxfev by the end of an iteration.
+*
+*      epsfcn is an input variable used in determining a suitable
+*        step length for the forward-difference approximation. this
+*        approximation assumes that the relative errors in the
+*        functions are of the order of epsfcn. if epsfcn is less
+*        than the machine precision, it is assumed that the relative
+*        errors in the functions are of the order of the machine
+*        precision.
+*
+*      diag is an array of length n. if mode = 1 (see
+*        below), diag is internally set. if mode = 2, diag
+*        must contain positive entries that serve as
+*        multiplicative scale factors for the variables.
+*
+*      mode is an integer input variable. if mode = 1, the
+*        variables will be scaled internally. if mode = 2,
+*        the scaling is specified by the input diag. other
+*        values of mode are equivalent to mode = 1.
+*
+*      factor is a positive input variable used in determining the
+*        initial step bound. this bound is set to the product of
+*        factor and the euclidean norm of diag*x if nonzero, or else
+*        to factor itself. in most cases factor should lie in the
+*        interval (.1,100.). 100. is a generally recommended value.
+*
+*      nprint is an integer input variable that enables controlled
+*        printing of iterates if it is positive. in this case,
+*        fcn is called with iflag = 0 at the beginning of the first
+*        iteration and every nprint iterations thereafter and
+*        immediately prior to return, with x and fvec available
+*        for printing. if nprint is not positive, no special calls
+*        of fcn with iflag = 0 are made.
+*
+*      info is an integer output variable. if the user has
+*        terminated execution, info is set to the (negative)
+*        value of iflag. see description of fcn. otherwise,
+*        info is set as follows.
+*
+*        info = 0  improper input parameters.
+*
+*        info = 1  both actual and predicted relative reductions
+*                  in the sum of squares are at most ftol.
+*
+*        info = 2  relative error between two consecutive iterates
+*                  is at most xtol.
+*
+*        info = 3  conditions for info = 1 and info = 2 both hold.
+*
+*        info = 4  the cosine of the angle between fvec and any
+*                  column of the jacobian is at most gtol in
+*                  absolute value.
+*
+*        info = 5  number of calls to fcn has reached or
+*                  exceeded maxfev.
+*
+*        info = 6  ftol is too small. no further reduction in
+*                  the sum of squares is possible.
+*
+*        info = 7  xtol is too small. no further improvement in
+*                  the approximate solution x is possible.
+*
+*        info = 8  gtol is too small. fvec is orthogonal to the
+*                  columns of the jacobian to machine precision.
+*
+*      nfev is an integer output variable set to the number of
+*        calls to fcn.
+*
+*      fjac is an output m by n array. the upper n by n submatrix
+*        of fjac contains an upper triangular matrix r with
+*        diagonal elements of nonincreasing magnitude such that
+*
+*               t     t           t
+*              p *(jac *jac)*p = r *r,
+*
+*        where p is a permutation matrix and jac is the final
+*        calculated jacobian. column j of p is column ipvt(j)
+*        (see below) of the identity matrix. the lower trapezoidal
+*        part of fjac contains information generated during
+*        the computation of r.
+*
+*      ldfjac is a positive integer input variable not less than m
+*        which specifies the leading dimension of the array fjac.
+*
+*      ipvt is an integer output array of length n. ipvt
+*        defines a permutation matrix p such that jac*p = q*r,
+*        where jac is the final calculated jacobian, q is
+*        orthogonal (not stored), and r is upper triangular
+*        with diagonal elements of nonincreasing magnitude.
+*        column j of p is column ipvt(j) of the identity matrix.
+*
+*      qtf is an output array of length n which contains
+*        the first n elements of the vector (q transpose)*fvec.
+*
+*      wa1, wa2, and wa3 are work arrays of length n.
+*
+*      wa4 is a work array of length m.
+*
+*     subprograms called
+*
+*      user-supplied ...... fcn
+*
+*      minpack-supplied ... dpmpar,enorm,fdjac2,lmpar,qrfac
+*
+*      fortran-supplied ... dabs,dmax1,dmin1,dsqrt,mod
+*
+*     argonne national laboratory. minpack project. march 1980.
+*     burton s. garbow, kenneth e. hillstrom, jorge j. more
+*
+* ********** */
+
+
+int mpfit(mp_func funct, int m, int npar,
+         double *xall, mp_par *pars, mp_config *config, void *private_data, 
+         mp_result *result)
+{
+  mp_config conf;
+  int i, j, info, iflag, nfree, npegged, iter;
+  int qanylim = 0;
+
+  int ij,jj,l;
+  double actred,delta,dirder,fnorm,fnorm1,gnorm, orignorm;
+  double par,pnorm,prered,ratio;
+  double sum,temp,temp1,temp2,temp3,xnorm, alpha;
+  static double one = 1.0;
+  static double p1 = 0.1;
+  static double p5 = 0.5;
+  static double p25 = 0.25;
+  static double p75 = 0.75;
+  static double p0001 = 1.0e-4;
+  static double zero = 0.0;
+  int nfev = 0;
+
+  double *step = 0, *dstep = 0, *llim = 0, *ulim = 0;
+  int *pfixed = 0, *mpside = 0, *ifree = 0, *qllim = 0, *qulim = 0;
+  int *ddebug = 0;
+  double *ddrtol = 0, *ddatol = 0;
+
+  double *fvec = 0, *qtf = 0;
+  double *x = 0, *xnew = 0, *fjac = 0, *diag = 0;
+  double *wa1 = 0, *wa2 = 0, *wa3 = 0, *wa4 = 0;
+  double **dvecptr = 0;
+  int *ipvt = 0;
+
+  int ldfjac;
+
+  /* Default configuration */
+  conf.ftol = 1e-10;
+  conf.xtol = 1e-10;
+  conf.gtol = 1e-10;
+  conf.stepfactor = 100.0;
+  conf.nprint = 1;
+  conf.epsfcn = MP_MACHEP0;
+  conf.maxiter = 200;
+  conf.douserscale = 0;
+  conf.maxfev = 0;
+  conf.covtol = 1e-14;
+  conf.nofinitecheck = 0;
+  
+  if (config) {
+    /* Transfer any user-specified configurations */
+    if (config->ftol > 0) conf.ftol = config->ftol;
+    if (config->xtol > 0) conf.xtol = config->xtol;
+    if (config->gtol > 0) conf.gtol = config->gtol;
+    if (config->stepfactor > 0) conf.stepfactor = config->stepfactor;
+    if (config->nprint >= 0) conf.nprint = config->nprint;
+    if (config->epsfcn > 0) conf.epsfcn = config->epsfcn;
+    if (config->maxiter > 0) conf.maxiter = config->maxiter;
+    if (config->maxiter == MP_NO_ITER) conf.maxiter = 0;
+    if (config->douserscale != 0) conf.douserscale = config->douserscale;
+    if (config->covtol > 0) conf.covtol = config->covtol;
+    if (config->nofinitecheck > 0) conf.nofinitecheck = config->nofinitecheck;
+    conf.maxfev = config->maxfev;
+  }
+
+  info = MP_ERR_INPUT; /* = 0 */
+  iflag = 0;
+  nfree = 0;
+  npegged = 0;
+
+  /* Basic error checking */
+  if (funct == 0) {
+    return MP_ERR_FUNC;
+  }
+
+  if ((m <= 0) || (xall == 0)) {
+    return MP_ERR_NPOINTS;
+  }
+  
+  if (npar <= 0) {
+    return MP_ERR_NFREE;
+  }
+
+  fnorm = -1.0;
+  fnorm1 = -1.0;
+  xnorm = -1.0;
+  delta = 0.0;
+
+  /* FIXED parameters? */
+  mp_malloc(pfixed, int, npar);
+  if (pars) for (i=0; i<npar; i++) {
+    pfixed[i] = (pars[i].fixed)?1:0;
+  }
+
+  /* Finite differencing step, absolute and relative, and sidedness of deriv */
+  mp_malloc(step,  double, npar);
+  mp_malloc(dstep, double, npar);
+  mp_malloc(mpside, int, npar);
+  mp_malloc(ddebug, int, npar);
+  mp_malloc(ddrtol, double, npar);
+  mp_malloc(ddatol, double, npar);
+  if (pars) for (i=0; i<npar; i++) {
+    step[i] = pars[i].step;
+    dstep[i] = pars[i].relstep;
+    mpside[i] = pars[i].side;
+    ddebug[i] = pars[i].deriv_debug;
+    ddrtol[i] = pars[i].deriv_reltol;
+    ddatol[i] = pars[i].deriv_abstol;
+  }
+    
+  /* Finish up the free parameters */
+  nfree = 0;
+  mp_malloc(ifree, int, npar);
+  for (i=0, j=0; i<npar; i++) {
+    if (pfixed[i] == 0) {
+      nfree++;
+      ifree[j++] = i;
+    }
+  }
+  if (nfree == 0) {
+    info = MP_ERR_NFREE;
+    goto CLEANUP;
+  }
+  
+  if (pars) {
+    for (i=0; i<npar; i++) {
+      if ( (pars[i].limited[0] && (xall[i] < pars[i].limits[0])) ||
+          (pars[i].limited[1] && (xall[i] > pars[i].limits[1])) ) {
+       info = MP_ERR_INITBOUNDS;
+       goto CLEANUP;
+      }
+      if ( (pars[i].fixed == 0) && pars[i].limited[0] && pars[i].limited[1] &&
+          (pars[i].limits[0] >= pars[i].limits[1])) {
+       info = MP_ERR_BOUNDS;
+       goto CLEANUP;
+      }
+    }
+
+    mp_malloc(qulim, int, nfree);
+    mp_malloc(qllim, int, nfree);
+    mp_malloc(ulim, double, nfree);
+    mp_malloc(llim, double, nfree);
+
+    for (i=0; i<nfree; i++) {
+      qllim[i] = pars[ifree[i]].limited[0];
+      qulim[i] = pars[ifree[i]].limited[1];
+      llim[i]  = pars[ifree[i]].limits[0];
+      ulim[i]  = pars[ifree[i]].limits[1];
+      if (qllim[i] || qulim[i]) qanylim = 1;
+    }
+  }
+
+  /* Sanity checking on input configuration */
+  if ((npar <= 0) || (conf.ftol <= 0) || (conf.xtol <= 0) ||
+      (conf.gtol <= 0) || (conf.maxiter < 0) ||
+      (conf.stepfactor <= 0)) {
+    info = MP_ERR_PARAM;
+    goto CLEANUP;
+  }
+
+  /* Ensure there are some degrees of freedom */
+  if (m < nfree) {
+    info = MP_ERR_DOF;
+    goto CLEANUP;
+  }
+
+  /* Allocate temporary storage */
+  mp_malloc(fvec, double, m);
+  mp_malloc(qtf, double, nfree);
+  mp_malloc(x, double, nfree);
+  mp_malloc(xnew, double, npar);
+  mp_malloc(fjac, double, m*nfree);
+  ldfjac = m;
+  mp_malloc(diag, double, npar);
+  mp_malloc(wa1, double, npar);
+  mp_malloc(wa2, double, npar);
+  mp_malloc(wa3, double, npar);
+  mp_malloc(wa4, double, m);
+  mp_malloc(ipvt, int, npar);
+  mp_malloc(dvecptr, double *, npar);
+
+  /* Evaluate user function with initial parameter values */
+  iflag = mp_call(funct, m, npar, xall, fvec, 0, private_data);
+  nfev += 1;
+  if (iflag < 0) {
+    goto CLEANUP;
+  }
+
+  fnorm = mp_enorm(m, fvec);
+  orignorm = fnorm*fnorm;
+
+  /* Make a new copy */
+  for (i=0; i<npar; i++) {
+    xnew[i] = xall[i];
+  }
+
+  /* Transfer free parameters to 'x' */
+  for (i=0; i<nfree; i++) {
+    x[i] = xall[ifree[i]];
+  }
+
+  /* Initialize Levelberg-Marquardt parameter and iteration counter */
+
+  par = 0.0;
+  iter = 1;
+  for (i=0; i<nfree; i++) {
+    qtf[i] = 0;
+  }
+
+  /* Beginning of the outer loop */
+ OUTER_LOOP:
+  for (i=0; i<nfree; i++) {
+    xnew[ifree[i]] = x[i];
+  }
+  
+  /* XXX call iterproc */
+
+  /* Calculate the jacobian matrix */
+  iflag = mp_fdjac2(funct, m, nfree, ifree, npar, xnew, fvec, fjac, ldfjac,
+                   conf.epsfcn, wa4, private_data, &nfev,
+                   step, dstep, mpside, qulim, ulim,
+                   ddebug, ddrtol, ddatol, wa2, dvecptr);
+  if (iflag < 0) {
+    goto CLEANUP;
+  }
+
+  /* Determine if any of the parameters are pegged at the limits */
+  if (qanylim) {
+    for (j=0; j<nfree; j++) {
+      int lpegged = (qllim[j] && (x[j] == llim[j]));
+      int upegged = (qulim[j] && (x[j] == ulim[j]));
+      sum = 0;
+
+      /* If the parameter is pegged at a limit, compute the gradient
+        direction */
+      if (lpegged || upegged) {
+       ij = j*ldfjac;
+       for (i=0; i<m; i++, ij++) {
+         sum += fvec[i] * fjac[ij];
+       }
+      }
+      /* If pegged at lower limit and gradient is toward negative then
+        reset gradient to zero */
+      if (lpegged && (sum > 0)) {
+       ij = j*ldfjac;
+       for (i=0; i<m; i++, ij++) fjac[ij] = 0;
+      }
+      /* If pegged at upper limit and gradient is toward positive then
+        reset gradient to zero */
+      if (upegged && (sum < 0)) {
+       ij = j*ldfjac;
+       for (i=0; i<m; i++, ij++) fjac[ij] = 0;
+      }
+    }
+  } 
+
+  /* Compute the QR factorization of the jacobian */
+  mp_qrfac(m,nfree,fjac,ldfjac,1,ipvt,nfree,wa1,wa2,wa3);
+
+  /*
+   *    on the first iteration and if mode is 1, scale according
+   *    to the norms of the columns of the initial jacobian.
+   */
+  if (iter == 1) {
+    if (conf.douserscale == 0) {
+      for (j=0; j<nfree; j++) {
+       diag[ifree[j]] = wa2[j];
+       if (wa2[j] == zero ) {
+         diag[ifree[j]] = one;
+       }
+      }
+    }
+
+    /*
+     *  on the first iteration, calculate the norm of the scaled x
+     *  and initialize the step bound delta.
+     */
+    for (j=0; j<nfree; j++ ) {
+      wa3[j] = diag[ifree[j]] * x[j];
+    }
+    
+    xnorm = mp_enorm(nfree, wa3);
+    delta = conf.stepfactor*xnorm;
+    if (delta == zero) delta = conf.stepfactor;
+  }
+
+  /*
+   *    form (q transpose)*fvec and store the first n components in
+   *    qtf.
+   */
+  for (i=0; i<m; i++ ) {
+    wa4[i] = fvec[i];
+  }
+
+  jj = 0;
+  for (j=0; j<nfree; j++ ) {
+    temp3 = fjac[jj];
+    if (temp3 != zero) {
+      sum = zero;
+      ij = jj;
+      for (i=j; i<m; i++ ) {
+       sum += fjac[ij] * wa4[i];
+       ij += 1;        /* fjac[i+m*j] */
+      }
+      temp = -sum / temp3;
+      ij = jj;
+      for (i=j; i<m; i++ ) {
+       wa4[i] += fjac[ij] * temp;
+       ij += 1;        /* fjac[i+m*j] */
+      }
+    }
+    fjac[jj] = wa1[j];
+    jj += m+1; /* fjac[j+m*j] */
+    qtf[j] = wa4[j];
+  }
+
+  /* ( From this point on, only the square matrix, consisting of the
+     triangle of R, is needed.) */
+
+  
+  if (conf.nofinitecheck) {
+    /* Check for overflow.  This should be a cheap test here since FJAC
+       has been reduced to a (small) square matrix, and the test is
+       O(N^2). */
+    int off = 0, nonfinite = 0;
+
+    for (j=0; j<nfree; j++) {
+      for (i=0; i<nfree; i++) {
+       if (mpfinite(fjac[off+i]) == 0) nonfinite = 1;
+      }
+      off += ldfjac;
+    }
+
+    if (nonfinite) {
+      info = MP_ERR_NAN;
+      goto CLEANUP;
+    }
+  }
+
+
+  /*
+   *    compute the norm of the scaled gradient.
+   */
+  gnorm = zero;
+  if (fnorm != zero) {
+    jj = 0;
+    for (j=0; j<nfree; j++ ) {
+      l = ipvt[j];
+      if (wa2[l] != zero) {
+       sum = zero;
+       ij = jj;
+       for (i=0; i<=j; i++ ) {
+         sum += fjac[ij]*(qtf[i]/fnorm);
+         ij += 1; /* fjac[i+m*j] */
+       }
+       gnorm = mp_dmax1(gnorm,fabs(sum/wa2[l]));
+      }
+      jj += m;
+    }
+  }
+
+  /*
+   *    test for convergence of the gradient norm.
+   */
+  if (gnorm <= conf.gtol) info = MP_OK_DIR;
+  if (info != 0) goto L300;
+  if (conf.maxiter == 0) {
+    info = MP_MAXITER;
+    goto L300;
+  }
+
+  /*
+   *    rescale if necessary.
+   */
+  if (conf.douserscale == 0) {
+    for (j=0; j<nfree; j++ ) {
+      diag[ifree[j]] = mp_dmax1(diag[ifree[j]],wa2[j]);
+    }
+  }
+
+  /*
+   *    beginning of the inner loop.
+   */
+ L200:
+  /*
+   *       determine the levenberg-marquardt parameter.
+   */
+  mp_lmpar(nfree,fjac,ldfjac,ipvt,ifree,diag,qtf,delta,&par,wa1,wa2,wa3,wa4);
+  /*
+   *       store the direction p and x + p. calculate the norm of p.
+   */
+  for (j=0; j<nfree; j++ ) {
+    wa1[j] = -wa1[j];
+  }
+
+  alpha = 1.0;
+  if (qanylim == 0) {
+    /* No parameter limits, so just move to new position WA2 */
+    for (j=0; j<nfree; j++ ) {
+      wa2[j] = x[j] + wa1[j];
+    }
+
+  } else {
+    /* Respect the limits.  If a step were to go out of bounds, then 
+     * we should take a step in the same direction but shorter distance.
+     * The step should take us right to the limit in that case.
+     */
+    for (j=0; j<nfree; j++) {
+      int lpegged = (qllim[j] && (x[j] <= llim[j]));
+      int upegged = (qulim[j] && (x[j] >= ulim[j]));
+      int dwa1 = fabs(wa1[j]) > MP_MACHEP0;
+      
+      if (lpegged && (wa1[j] < 0)) wa1[j] = 0;
+      if (upegged && (wa1[j] > 0)) wa1[j] = 0;
+
+      if (dwa1 && qllim[j] && ((x[j] + wa1[j]) < llim[j])) {
+       alpha = mp_dmin1(alpha, (llim[j]-x[j])/wa1[j]);
+      }
+      if (dwa1 && qulim[j] && ((x[j] + wa1[j]) > ulim[j])) {
+       alpha = mp_dmin1(alpha, (ulim[j]-x[j])/wa1[j]);
+      }
+    }
+    
+    /* Scale the resulting vector, advance to the next position */
+    for (j=0; j<nfree; j++) {
+      double sgnu, sgnl;
+      double ulim1, llim1;
+
+      wa1[j] = wa1[j] * alpha;
+      wa2[j] = x[j] + wa1[j];
+
+      /* Adjust the output values.  If the step put us exactly
+       * on a boundary, make sure it is exact.
+       */
+      sgnu = (ulim[j] >= 0) ? (+1) : (-1);
+      sgnl = (llim[j] >= 0) ? (+1) : (-1);
+      ulim1 = ulim[j]*(1-sgnu*MP_MACHEP0) - ((ulim[j] == 0)?(MP_MACHEP0):0);
+      llim1 = llim[j]*(1+sgnl*MP_MACHEP0) + ((llim[j] == 0)?(MP_MACHEP0):0);
+
+      if (qulim[j] && (wa2[j] >= ulim1)) {
+       wa2[j] = ulim[j];
+      }
+      if (qllim[j] && (wa2[j] <= llim1)) {
+       wa2[j] = llim[j];
+      }
+    }
+
+  }
+
+  for (j=0; j<nfree; j++ ) {
+    wa3[j] = diag[ifree[j]]*wa1[j];
+  }
+
+  pnorm = mp_enorm(nfree,wa3);
+  
+  /*
+   *       on the first iteration, adjust the initial step bound.
+   */
+  if (iter == 1) {
+    delta = mp_dmin1(delta,pnorm);
+  }
+
+  /*
+   *       evaluate the function at x + p and calculate its norm.
+   */
+  for (i=0; i<nfree; i++) {
+    xnew[ifree[i]] = wa2[i];
+  }
+
+  iflag = mp_call(funct, m, npar, xnew, wa4, 0, private_data);
+  nfev += 1;
+  if (iflag < 0) goto L300;
+
+  fnorm1 = mp_enorm(m,wa4);
+
+  /*
+   *       compute the scaled actual reduction.
+   */
+  actred = -one;
+  if ((p1*fnorm1) < fnorm) {
+    temp = fnorm1/fnorm;
+    actred = one - temp * temp;
+  }
+
+  /*
+   *       compute the scaled predicted reduction and
+   *       the scaled directional derivative.
+   */
+  jj = 0;
+  for (j=0; j<nfree; j++ ) {
+    wa3[j] = zero;
+    l = ipvt[j];
+    temp = wa1[l];
+    ij = jj;
+    for (i=0; i<=j; i++ ) {
+      wa3[i] += fjac[ij]*temp;
+      ij += 1; /* fjac[i+m*j] */
+    }
+    jj += m;
+  }
+
+  /* Remember, alpha is the fraction of the full LM step actually
+   * taken
+   */
+
+  temp1 = mp_enorm(nfree,wa3)*alpha/fnorm;
+  temp2 = (sqrt(alpha*par)*pnorm)/fnorm;
+  prered = temp1*temp1 + (temp2*temp2)/p5;
+  dirder = -(temp1*temp1 + temp2*temp2);
+
+  /*
+   *       compute the ratio of the actual to the predicted
+   *       reduction.
+   */
+  ratio = zero;
+  if (prered != zero) {
+    ratio = actred/prered;
+  }
+
+  /*
+   *       update the step bound.
+   */
+  
+  if (ratio <= p25) {
+    if (actred >= zero) {
+      temp = p5; 
+    } else {
+      temp = p5*dirder/(dirder + p5*actred);
+    }
+    if (((p1*fnorm1) >= fnorm)
+       || (temp < p1) ) {
+      temp = p1;
+    }
+    delta = temp*mp_dmin1(delta,pnorm/p1);
+    par = par/temp;
+  } else {
+    if ((par == zero) || (ratio >= p75) ) {
+      delta = pnorm/p5;
+      par = p5*par;
+    }
+  }
+
+  /*
+   *       test for successful iteration.
+   */
+  if (ratio >= p0001) {
+    
+    /*
+     *     successful iteration. update x, fvec, and their norms.
+     */
+    for (j=0; j<nfree; j++ ) {
+      x[j] = wa2[j];
+      wa2[j] = diag[ifree[j]]*x[j];
+    }
+    for (i=0; i<m; i++ ) {
+      fvec[i] = wa4[i];
+    }
+    xnorm = mp_enorm(nfree,wa2);
+    fnorm = fnorm1;
+    iter += 1;
+  }
+  
+  /*
+   *       tests for convergence.
+   */
+  if ((fabs(actred) <= conf.ftol) && (prered <= conf.ftol) && 
+      (p5*ratio <= one) ) {
+    info = MP_OK_CHI;
+  }
+  if (delta <= conf.xtol*xnorm) {
+    info = MP_OK_PAR;
+  }
+  if ((fabs(actred) <= conf.ftol) && (prered <= conf.ftol) && (p5*ratio <= one)
+      && ( info == 2) ) {
+    info = MP_OK_BOTH;
+  }
+  if (info != 0) {
+    goto L300;
+  }
+  
+  /*
+   *       tests for termination and stringent tolerances.
+   */
+  if ((conf.maxfev > 0) && (nfev >= conf.maxfev)) {
+    /* Too many function evaluations */
+    info = MP_MAXITER;
+  }
+  if (iter >= conf.maxiter) {
+    /* Too many iterations */
+    info = MP_MAXITER;
+  }
+  if ((fabs(actred) <= MP_MACHEP0) && (prered <= MP_MACHEP0) && (p5*ratio <= 
one) ) {
+    info = MP_FTOL;
+  }
+  if (delta <= MP_MACHEP0*xnorm) {
+    info = MP_XTOL;
+  }
+  if (gnorm <= MP_MACHEP0) {
+    info = MP_GTOL;
+  }
+  if (info != 0) {
+    goto L300;
+  }
+  
+  /*
+   *       end of the inner loop. repeat if iteration unsuccessful.
+   */
+  if (ratio < p0001) goto L200;
+  /*
+   *    end of the outer loop.
+   */
+  goto OUTER_LOOP;
+
+ L300:
+  /*
+   *     termination, either normal or user imposed.
+   */
+  if (iflag < 0) {
+    info = iflag;
+  }
+  iflag = 0;
+
+  for (i=0; i<nfree; i++) {
+    xall[ifree[i]] = x[i];
+  }
+  
+  if ((conf.nprint > 0) && (info > 0)) {
+    iflag = mp_call(funct, m, npar, xall, fvec, 0, private_data);
+    nfev += 1;
+  }
+
+  /* Compute number of pegged parameters */
+  npegged = 0;
+  if (pars) for (i=0; i<npar; i++) {
+    if ((pars[i].limited[0] && (pars[i].limits[0] == xall[i])) ||
+       (pars[i].limited[1] && (pars[i].limits[1] == xall[i]))) {
+      npegged ++;
+    }
+  }
+
+  /* Compute and return the covariance matrix and/or parameter errors */
+  if (result && (result->covar || result->xerror)) {
+    mp_covar(nfree, fjac, ldfjac, ipvt, conf.covtol, wa2);
+    
+    if (result->covar) {
+      /* Zero the destination covariance array */
+      for (j=0; j<(npar*npar); j++) result->covar[j] = 0;
+      
+      /* Transfer the covariance array */
+      for (j=0; j<nfree; j++) {
+       for (i=0; i<nfree; i++) {
+         result->covar[ifree[j]*npar+ifree[i]] = fjac[j*ldfjac+i];
+       }
+      }
+    }
+
+    if (result->xerror) {
+      for (j=0; j<npar; j++) result->xerror[j] = 0;
+
+      for (j=0; j<nfree; j++) {
+       double cc = fjac[j*ldfjac+j];
+       if (cc > 0) result->xerror[ifree[j]] = sqrt(cc);
+      }
+    }
+  }      
+
+  if (result) {
+    strcpy(result->version, MPFIT_VERSION);
+    result->bestnorm = mp_dmax1(fnorm,fnorm1);
+    result->bestnorm *= result->bestnorm;
+    result->orignorm = orignorm;
+    result->status   = info;
+    result->niter    = iter;
+    result->nfev     = nfev;
+    result->npar     = npar;
+    result->nfree    = nfree;
+    result->npegged  = npegged;
+    result->nfunc    = m;
+    
+    /* Copy residuals if requested */
+    if (result->resid) {
+      for (j=0; j<m; j++) result->resid[j] = fvec[j];
+    }
+  }
+
+
+ CLEANUP:
+  if (fvec) free(fvec);
+  if (qtf)  free(qtf);
+  if (x)    free(x);
+  if (xnew) free(xnew);
+  if (fjac) free(fjac);
+  if (diag) free(diag);
+  if (wa1)  free(wa1);
+  if (wa2)  free(wa2);
+  if (wa3)  free(wa3);
+  if (wa4)  free(wa4);
+  if (ipvt) free(ipvt);
+  if (pfixed) free(pfixed);
+  if (step) free(step);
+  if (dstep) free(dstep);
+  if (mpside) free(mpside);
+  if (ddebug) free(ddebug);
+  if (ddrtol) free(ddrtol);
+  if (ddatol) free(ddatol);
+  if (ifree) free(ifree);
+  if (qllim) free(qllim);
+  if (qulim) free(qulim);
+  if (llim)  free(llim);
+  if (ulim)  free(ulim);
+  if (dvecptr) free(dvecptr);
+
+  return info;
+}
+
+
+/************************fdjac2.c*************************/
+
+static 
+int mp_fdjac2(mp_func funct,
+             int m, int n, int *ifree, int npar, double *x, double *fvec,
+             double *fjac, int ldfjac, double epsfcn,
+             double *wa, void *priv, int *nfev,
+             double *step, double *dstep, int *dside,
+             int *qulimited, double *ulimit,
+             int *ddebug, double *ddrtol, double *ddatol,
+             double *wa2, double **dvec)
+{
+/*
+*     **********
+*
+*     subroutine fdjac2
+*
+*     this subroutine computes a forward-difference approximation
+*     to the m by n jacobian matrix associated with a specified
+*     problem of m functions in n variables.
+*
+*     the subroutine statement is
+*
+*      subroutine fdjac2(fcn,m,n,x,fvec,fjac,ldfjac,iflag,epsfcn,wa)
+*
+*     where
+*
+*      fcn is the name of the user-supplied subroutine which
+*        calculates the functions. fcn must be declared
+*        in an external statement in the user calling
+*        program, and should be written as follows.
+*
+*        subroutine fcn(m,n,x,fvec,iflag)
+*        integer m,n,iflag
+*        double precision x(n),fvec(m)
+*        ----------
+*        calculate the functions at x and
+*        return this vector in fvec.
+*        ----------
+*        return
+*        end
+*
+*        the value of iflag should not be changed by fcn unless
+*        the user wants to terminate execution of fdjac2.
+*        in this case set iflag to a negative integer.
+*
+*      m is a positive integer input variable set to the number
+*        of functions.
+*
+*      n is a positive integer input variable set to the number
+*        of variables. n must not exceed m.
+*
+*      x is an input array of length n.
+*
+*      fvec is an input array of length m which must contain the
+*        functions evaluated at x.
+*
+*      fjac is an output m by n array which contains the
+*        approximation to the jacobian matrix evaluated at x.
+*
+*      ldfjac is a positive integer input variable not less than m
+*        which specifies the leading dimension of the array fjac.
+*
+*      iflag is an integer variable which can be used to terminate
+*        the execution of fdjac2. see description of fcn.
+*
+*      epsfcn is an input variable used in determining a suitable
+*        step length for the forward-difference approximation. this
+*        approximation assumes that the relative errors in the
+*        functions are of the order of epsfcn. if epsfcn is less
+*        than the machine precision, it is assumed that the relative
+*        errors in the functions are of the order of the machine
+*        precision.
+*
+*      wa is a work array of length m.
+*
+*     subprograms called
+*
+*      user-supplied ...... fcn
+*
+*      minpack-supplied ... dpmpar
+*
+*      fortran-supplied ... dabs,dmax1,dsqrt
+*
+*     argonne national laboratory. minpack project. march 1980.
+*     burton s. garbow, kenneth e. hillstrom, jorge j. more
+*
+      **********
+*/
+  int i,j,ij;
+  int iflag = 0;
+  double eps,h,temp;
+  static double zero = 0.0;
+  int has_analytical_deriv = 0, has_numerical_deriv = 0;
+  int has_debug_deriv = 0;
+  
+  temp = mp_dmax1(epsfcn,MP_MACHEP0);
+  eps = sqrt(temp);
+  ij = 0;
+  ldfjac = 0;   /* Prevent compiler warning */
+  if (ldfjac){} /* Prevent compiler warning */
+
+  for (j=0; j<npar; j++) dvec[j] = 0;
+
+  /* Initialize the Jacobian derivative matrix */
+  for (j=0; j<(n*m); j++) fjac[j] = 0;
+
+  /* Check for which parameters need analytical derivatives and which
+     need numerical ones */
+  for (j=0; j<n; j++) {  /* Loop through free parameters only */
+    if (dside && dside[ifree[j]] == 3 && ddebug[ifree[j]] == 0) {
+      /* Purely analytical derivatives */
+      dvec[ifree[j]] = fjac + j*m;
+      has_analytical_deriv = 1;
+    } else if (dside && ddebug[ifree[j]] == 1) {
+      /* Numerical and analytical derivatives as a debug cross-check */
+      dvec[ifree[j]] = fjac + j*m;
+      has_analytical_deriv = 1;
+      has_numerical_deriv = 1;
+      has_debug_deriv = 1;
+    } else {
+      has_numerical_deriv = 1;
+    }
+  }
+
+  /* If there are any parameters requiring analytical derivatives,
+     then compute them first. */
+  if (has_analytical_deriv) {
+    iflag = mp_call(funct, m, npar, x, wa, dvec, priv);
+    if (nfev) *nfev = *nfev + 1;
+    if (iflag < 0 ) goto DONE;
+  }
+
+  if (has_debug_deriv) {
+    printf("FJAC DEBUG BEGIN\n");
+    printf("#  %10s %10s %10s %10s %10s %10s\n", 
+          "IPNT", "FUNC", "DERIV_U", "DERIV_N", "DIFF_ABS", "DIFF_REL");
+  }
+
+  /* Any parameters requiring numerical derivatives */
+  if (has_numerical_deriv) for (j=0; j<n; j++) {  /* Loop thru free parms */
+    int dsidei = (dside)?(dside[ifree[j]]):(0);
+    int debug  = ddebug[ifree[j]];
+    double dr = ddrtol[ifree[j]], da = ddatol[ifree[j]];
+    
+    /* Check for debugging */
+    if (debug) {
+      printf("FJAC PARM %d\n", ifree[j]);
+    }
+
+    /* Skip parameters already done by user-computed partials */
+    if (dside && dsidei == 3) continue;
+
+    temp = x[ifree[j]];
+    h = eps * fabs(temp);
+    if (step  &&  step[ifree[j]] > 0) h = step[ifree[j]];
+    if (dstep && dstep[ifree[j]] > 0) h = fabs(dstep[ifree[j]]*temp);
+    if (h == zero)                    h = eps;
+
+    /* If negative step requested, or we are against the upper limit */
+    if ((dside && dsidei == -1) || 
+       (dside && dsidei == 0 && 
+        qulimited && ulimit && qulimited[j] && 
+        (temp > (ulimit[j]-h)))) {
+      h = -h;
+    }
+
+    x[ifree[j]] = temp + h;
+    iflag = mp_call(funct, m, npar, x, wa, 0, priv);
+    if (nfev) *nfev = *nfev + 1;
+    if (iflag < 0 ) goto DONE;
+    x[ifree[j]] = temp;
+
+    if (dsidei <= 1) {
+      /* COMPUTE THE ONE-SIDED DERIVATIVE */
+      if (! debug) {
+       /* Non-debug path for speed */
+       for (i=0; i<m; i++, ij++) {
+         fjac[ij] = (wa[i] - fvec[i])/h; /* fjac[i+m*j] */
+       }
+      } else {
+       /* Debug path for correctness */
+       for (i=0; i<m; i++, ij++) {
+         double fjold = fjac[ij];
+         fjac[ij] = (wa[i] - fvec[i])/h; /* fjac[i+m*j] */
+         if ((da == 0 && dr == 0 && (fjold != 0 || fjac[ij] != 0)) ||
+             ((da != 0 || dr != 0) && (fabs(fjold-fjac[ij]) > da + 
fabs(fjold)*dr))) {
+           printf("   %10d %10.4g %10.4g %10.4g %10.4g %10.4g\n", 
+                  i, fvec[i], fjold, fjac[ij], fjold-fjac[ij], 
+                  (fjold == 0)?(0):((fjold-fjac[ij])/fjold));
+         }
+       }
+      } /* end debugging */
+
+    } else {  /* dside > 2 */
+      /* COMPUTE THE TWO-SIDED DERIVATIVE */
+      for (i=0; i<m; i++) {
+       wa2[i] = wa[i];
+      }
+
+      /* Evaluate at x - h */
+      x[ifree[j]] = temp - h;
+      iflag = mp_call(funct, m, npar, x, wa, 0, priv);
+      if (nfev) *nfev = *nfev + 1;
+      if (iflag < 0 ) goto DONE;
+      x[ifree[j]] = temp;
+
+      /* Now compute derivative as (f(x+h) - f(x-h))/(2h) */
+      if (! debug ) {
+       /* Non-debug path for speed */
+       for (i=0; i<m; i++, ij++) {
+         fjac[ij] = (fjac[ij] - wa[i])/(2*h); /* fjac[i+m*j] */
+       }
+      } else {
+       /* Debug path for correctness */
+       for (i=0; i<m; i++, ij++) {
+         double fjold = fjac[ij];
+         fjac[ij] = (wa2[i] - wa[i])/(2*h); /* fjac[i+m*j] */
+         if ((da == 0 && dr == 0 && (fjold != 0 || fjac[ij] != 0)) ||
+             ((da != 0 || dr != 0) && (fabs(fjold-fjac[ij]) > da + 
fabs(fjold)*dr))) {
+           printf("   %10d %10.4g %10.4g %10.4g %10.4g %10.4g\n", 
+                  i, fvec[i], fjold, fjac[ij], fjold-fjac[ij], 
+                  (fjold == 0)?(0):((fjold-fjac[ij])/fjold));
+         }
+       }
+      } /* end debugging */
+      
+    } /* if (dside > 2) */
+  } /* if (has_numerical_derivative) */
+
+  if (has_debug_deriv) {
+    printf("FJAC DEBUG END\n");
+  }
+
+ DONE:
+  if (iflag < 0) return iflag;
+  return 0; 
+  /*
+   *     last card of subroutine fdjac2.
+   */
+}
+
+
+/************************qrfac.c*************************/
+ 
+static 
+void mp_qrfac(int m, int n, double *a, int lda, 
+             int pivot, int *ipvt, int lipvt,
+             double *rdiag, double *acnorm, double *wa)
+{
+/*
+*     **********
+*
+*     subroutine qrfac
+*
+*     this subroutine uses householder transformations with column
+*     pivoting (optional) to compute a qr factorization of the
+*     m by n matrix a. that is, qrfac determines an orthogonal
+*     matrix q, a permutation matrix p, and an upper trapezoidal
+*     matrix r with diagonal elements of nonincreasing magnitude,
+*     such that a*p = q*r. the householder transformation for
+*     column k, k = 1,2,...,min(m,n), is of the form
+*
+*                          t
+*          i - (1/u(k))*u*u
+*
+*     where u has zeros in the first k-1 positions. the form of
+*     this transformation and the method of pivoting first
+*     appeared in the corresponding linpack subroutine.
+*
+*     the subroutine statement is
+*
+*      subroutine qrfac(m,n,a,lda,pivot,ipvt,lipvt,rdiag,acnorm,wa)
+*
+*     where
+*
+*      m is a positive integer input variable set to the number
+*        of rows of a.
+*
+*      n is a positive integer input variable set to the number
+*        of columns of a.
+*
+*      a is an m by n array. on input a contains the matrix for
+*        which the qr factorization is to be computed. on output
+*        the strict upper trapezoidal part of a contains the strict
+*        upper trapezoidal part of r, and the lower trapezoidal
+*        part of a contains a factored form of q (the non-trivial
+*        elements of the u vectors described above).
+*
+*      lda is a positive integer input variable not less than m
+*        which specifies the leading dimension of the array a.
+*
+*      pivot is a logical input variable. if pivot is set true,
+*        then column pivoting is enforced. if pivot is set false,
+*        then no column pivoting is done.
+*
+*      ipvt is an integer output array of length lipvt. ipvt
+*        defines the permutation matrix p such that a*p = q*r.
+*        column j of p is column ipvt(j) of the identity matrix.
+*        if pivot is false, ipvt is not referenced.
+*
+*      lipvt is a positive integer input variable. if pivot is false,
+*        then lipvt may be as small as 1. if pivot is true, then
+*        lipvt must be at least n.
+*
+*      rdiag is an output array of length n which contains the
+*        diagonal elements of r.
+*
+*      acnorm is an output array of length n which contains the
+*        norms of the corresponding columns of the input matrix a.
+*        if this information is not needed, then acnorm can coincide
+*        with rdiag.
+*
+*      wa is a work array of length n. if pivot is false, then wa
+*        can coincide with rdiag.
+*
+*     subprograms called
+*
+*      minpack-supplied ... dpmpar,enorm
+*
+*      fortran-supplied ... dmax1,dsqrt,min0
+*
+*     argonne national laboratory. minpack project. march 1980.
+*     burton s. garbow, kenneth e. hillstrom, jorge j. more
+*
+*     **********
+*/
+  int i,ij,jj,j,jp1,k,kmax,minmn;
+  double ajnorm,sum,temp;
+  static double zero = 0.0;
+  static double one = 1.0;
+  static double p05 = 0.05;
+
+  lda = 0;      /* Prevent compiler warning */
+  lipvt = 0;    /* Prevent compiler warning */
+  if (lda) {}   /* Prevent compiler warning */
+  if (lipvt) {} /* Prevent compiler warning */
+
+  /*
+   *     compute the initial column norms and initialize several arrays.
+   */
+  ij = 0;
+  for (j=0; j<n; j++) {
+    acnorm[j] = mp_enorm(m,&a[ij]);
+    rdiag[j] = acnorm[j];
+    wa[j] = rdiag[j];
+    if (pivot != 0)
+      ipvt[j] = j;
+    ij += m; /* m*j */
+  }
+  /*
+   *     reduce a to r with householder transformations.
+   */
+  minmn = mp_min0(m,n);
+  for (j=0; j<minmn; j++) {
+    if (pivot == 0)
+      goto L40;
+    /*
+     *  bring the column of largest norm into the pivot position.
+     */
+    kmax = j;
+    for (k=j; k<n; k++)
+      {
+       if (rdiag[k] > rdiag[kmax])
+         kmax = k;
+      }
+    if (kmax == j)
+      goto L40;
+      
+    ij = m * j;
+    jj = m * kmax;
+    for (i=0; i<m; i++)
+      {
+       temp = a[ij]; /* [i+m*j] */
+       a[ij] = a[jj]; /* [i+m*kmax] */
+       a[jj] = temp;
+       ij += 1;
+       jj += 1;
+      }
+    rdiag[kmax] = rdiag[j];
+    wa[kmax] = wa[j];
+    k = ipvt[j];
+    ipvt[j] = ipvt[kmax];
+    ipvt[kmax] = k;
+      
+  L40:
+    /*
+     *  compute the householder transformation to reduce the
+     *  j-th column of a to a multiple of the j-th unit vector.
+     */
+    jj = j + m*j;
+    ajnorm = mp_enorm(m-j,&a[jj]);
+    if (ajnorm == zero)
+      goto L100;
+    if (a[jj] < zero)
+      ajnorm = -ajnorm;
+    ij = jj;
+    for (i=j; i<m; i++)
+      {
+       a[ij] /= ajnorm;
+       ij += 1; /* [i+m*j] */
+      }
+    a[jj] += one;
+    /*
+     *  apply the transformation to the remaining columns
+     *  and update the norms.
+     */
+    jp1 = j + 1;
+    if (jp1 < n)
+      {
+       for (k=jp1; k<n; k++)
+         {
+           sum = zero;
+           ij = j + m*k;
+           jj = j + m*j;
+           for (i=j; i<m; i++)
+             {
+               sum += a[jj]*a[ij];
+               ij += 1; /* [i+m*k] */
+               jj += 1; /* [i+m*j] */
+             }
+           temp = sum/a[j+m*j];
+           ij = j + m*k;
+           jj = j + m*j;
+           for (i=j; i<m; i++)
+             {
+               a[ij] -= temp*a[jj];
+               ij += 1; /* [i+m*k] */
+               jj += 1; /* [i+m*j] */
+             }
+           if ((pivot != 0) && (rdiag[k] != zero))
+             {
+               temp = a[j+m*k]/rdiag[k];
+               temp = mp_dmax1( zero, one-temp*temp );
+               rdiag[k] *= sqrt(temp);
+               temp = rdiag[k]/wa[k];
+               if ((p05*temp*temp) <= MP_MACHEP0)
+                 {
+                   rdiag[k] = mp_enorm(m-j-1,&a[jp1+m*k]);
+                   wa[k] = rdiag[k];
+                 }
+             }
+         }
+      }
+      
+  L100:
+    rdiag[j] = -ajnorm;
+  }
+  /*
+   *     last card of subroutine qrfac.
+   */
+}
+
+/************************qrsolv.c*************************/
+
+static 
+void mp_qrsolv(int n, double *r, int ldr, int *ipvt, double *diag,
+              double *qtb, double *x, double *sdiag, double *wa)
+{
+/*
+*     **********
+*
+*     subroutine qrsolv
+*
+*     given an m by n matrix a, an n by n diagonal matrix d,
+*     and an m-vector b, the problem is to determine an x which
+*     solves the system
+*
+*          a*x = b ,     d*x = 0 ,
+*
+*     in the least squares sense.
+*
+*     this subroutine completes the solution of the problem
+*     if it is provided with the necessary information from the
+*     qr factorization, with column pivoting, of a. that is, if
+*     a*p = q*r, where p is a permutation matrix, q has orthogonal
+*     columns, and r is an upper triangular matrix with diagonal
+*     elements of nonincreasing magnitude, then qrsolv expects
+*     the full upper triangle of r, the permutation matrix p,
+*     and the first n components of (q transpose)*b. the system
+*     a*x = b, d*x = 0, is then equivalent to
+*
+*                 t       t
+*          r*z = q *b ,  p *d*p*z = 0 ,
+*
+*     where x = p*z. if this system does not have full rank,
+*     then a least squares solution is obtained. on output qrsolv
+*     also provides an upper triangular matrix s such that
+*
+*           t   t               t
+*          p *(a *a + d*d)*p = s *s .
+*
+*     s is computed within qrsolv and may be of separate interest.
+*
+*     the subroutine statement is
+*
+*      subroutine qrsolv(n,r,ldr,ipvt,diag,qtb,x,sdiag,wa)
+*
+*     where
+*
+*      n is a positive integer input variable set to the order of r.
+*
+*      r is an n by n array. on input the full upper triangle
+*        must contain the full upper triangle of the matrix r.
+*        on output the full upper triangle is unaltered, and the
+*        strict lower triangle contains the strict upper triangle
+*        (transposed) of the upper triangular matrix s.
+*
+*      ldr is a positive integer input variable not less than n
+*        which specifies the leading dimension of the array r.
+*
+*      ipvt is an integer input array of length n which defines the
+*        permutation matrix p such that a*p = q*r. column j of p
+*        is column ipvt(j) of the identity matrix.
+*
+*      diag is an input array of length n which must contain the
+*        diagonal elements of the matrix d.
+*
+*      qtb is an input array of length n which must contain the first
+*        n elements of the vector (q transpose)*b.
+*
+*      x is an output array of length n which contains the least
+*        squares solution of the system a*x = b, d*x = 0.
+*
+*      sdiag is an output array of length n which contains the
+*        diagonal elements of the upper triangular matrix s.
+*
+*      wa is a work array of length n.
+*
+*     subprograms called
+*
+*      fortran-supplied ... dabs,dsqrt
+*
+*     argonne national laboratory. minpack project. march 1980.
+*     burton s. garbow, kenneth e. hillstrom, jorge j. more
+*
+*     **********
+*/
+  int i,ij,ik,kk,j,jp1,k,kp1,l,nsing;
+  double cosx,cotan,qtbpj,sinx,sum,tanx,temp;
+  static double zero = 0.0;
+  static double p25 = 0.25;
+  static double p5 = 0.5;
+  
+  /*
+   *     copy r and (q transpose)*b to preserve input and initialize s.
+   *     in particular, save the diagonal elements of r in x.
+   */
+  kk = 0;
+  for (j=0; j<n; j++) {
+    ij = kk;
+    ik = kk;
+    for (i=j; i<n; i++)
+      {
+       r[ij] = r[ik];
+       ij += 1;   /* [i+ldr*j] */
+       ik += ldr; /* [j+ldr*i] */
+      }
+    x[j] = r[kk];
+    wa[j] = qtb[j];
+    kk += ldr+1; /* j+ldr*j */
+  }
+
+  /*
+   *     eliminate the diagonal matrix d using a givens rotation.
+   */
+  for (j=0; j<n; j++) {
+    /*
+     *  prepare the row of d to be eliminated, locating the
+     *  diagonal element using p from the qr factorization.
+     */
+    l = ipvt[j];
+    if (diag[l] == zero)
+      goto L90;
+    for (k=j; k<n; k++)
+      sdiag[k] = zero;
+    sdiag[j] = diag[l];
+    /*
+     *  the transformations to eliminate the row of d
+     *  modify only a single element of (q transpose)*b
+     *  beyond the first n, which is initially zero.
+     */
+    qtbpj = zero;
+    for (k=j; k<n; k++)
+      {
+       /*
+        *          determine a givens rotation which eliminates the
+        *          appropriate element in the current row of d.
+        */
+       if (sdiag[k] == zero)
+         continue;
+       kk = k + ldr * k;
+       if (fabs(r[kk]) < fabs(sdiag[k]))
+         {
+           cotan = r[kk]/sdiag[k];
+           sinx = p5/sqrt(p25+p25*cotan*cotan);
+           cosx = sinx*cotan;
+         }
+       else
+         {
+           tanx = sdiag[k]/r[kk];
+           cosx = p5/sqrt(p25+p25*tanx*tanx);
+           sinx = cosx*tanx;
+         }
+       /*
+        *          compute the modified diagonal element of r and
+        *          the modified element of ((q transpose)*b,0).
+        */
+       r[kk] = cosx*r[kk] + sinx*sdiag[k];
+       temp = cosx*wa[k] + sinx*qtbpj;
+       qtbpj = -sinx*wa[k] + cosx*qtbpj;
+       wa[k] = temp;
+       /*
+        *          accumulate the tranformation in the row of s.
+        */
+       kp1 = k + 1;
+       if (n > kp1)
+         {
+           ik = kk + 1;
+           for (i=kp1; i<n; i++)
+             {
+               temp = cosx*r[ik] + sinx*sdiag[i];
+               sdiag[i] = -sinx*r[ik] + cosx*sdiag[i];
+               r[ik] = temp;
+               ik += 1; /* [i+ldr*k] */
+             }
+         }
+      }
+  L90:
+    /*
+     *  store the diagonal element of s and restore
+     *  the corresponding diagonal element of r.
+     */
+    kk = j + ldr*j;
+    sdiag[j] = r[kk];
+    r[kk] = x[j];
+  }
+  /*
+   *     solve the triangular system for z. if the system is
+   *     singular, then obtain a least squares solution.
+   */
+  nsing = n;
+  for (j=0; j<n; j++) {
+    if ((sdiag[j] == zero) && (nsing == n))
+      nsing = j;
+    if (nsing < n)
+      wa[j] = zero;
+  }
+  if (nsing < 1)
+    goto L150;
+  
+  for (k=0; k<nsing; k++) {
+    j = nsing - k - 1;
+    sum = zero;
+    jp1 = j + 1;
+    if (nsing > jp1)
+      {
+       ij = jp1 + ldr * j;
+       for (i=jp1; i<nsing; i++)
+         {
+           sum += r[ij]*wa[i];
+           ij += 1; /* [i+ldr*j] */
+         }
+      }
+    wa[j] = (wa[j] - sum)/sdiag[j];
+  }
+ L150:
+  /*
+   *     permute the components of z back to components of x.
+   */
+  for (j=0; j<n; j++) {
+    l = ipvt[j];
+    x[l] = wa[j];
+  }
+  /*
+   *     last card of subroutine qrsolv.
+   */
+}
+
+/************************lmpar.c*************************/
+
+static 
+void mp_lmpar(int n, double *r, int ldr, int *ipvt, int *ifree, double *diag,
+             double *qtb, double delta, double *par, double *x,
+             double *sdiag, double *wa1, double *wa2) 
+{
+  /*     **********
+   *
+   *     subroutine lmpar
+   *
+   *     given an m by n matrix a, an n by n nonsingular diagonal
+   *     matrix d, an m-vector b, and a positive number delta,
+   *     the problem is to determine a value for the parameter
+   *     par such that if x solves the system
+   *
+   *       a*x = b ,     sqrt(par)*d*x = 0 ,
+   *
+   *     in the least squares sense, and dxnorm is the euclidean
+   *     norm of d*x, then either par is zero and
+   *
+   *       (dxnorm-delta) .le. 0.1*delta ,
+   *
+   *     or par is positive and
+   *
+   *       abs(dxnorm-delta) .le. 0.1*delta .
+   *
+   *     this subroutine completes the solution of the problem
+   *     if it is provided with the necessary information from the
+   *     qr factorization, with column pivoting, of a. that is, if
+   *     a*p = q*r, where p is a permutation matrix, q has orthogonal
+   *     columns, and r is an upper triangular matrix with diagonal
+   *     elements of nonincreasing magnitude, then lmpar expects
+   *     the full upper triangle of r, the permutation matrix p,
+   *     and the first n components of (q transpose)*b. on output
+   *     lmpar also provides an upper triangular matrix s such that
+   *
+   *        t   t                   t
+   *       p *(a *a + par*d*d)*p = s *s .
+   *
+   *     s is employed within lmpar and may be of separate interest.
+   *
+   *     only a few iterations are generally needed for convergence
+   *     of the algorithm. if, however, the limit of 10 iterations
+   *     is reached, then the output par will contain the best
+   *     value obtained so far.
+   *
+   *     the subroutine statement is
+   *
+   *   subroutine lmpar(n,r,ldr,ipvt,diag,qtb,delta,par,x,sdiag,
+   *                    wa1,wa2)
+   *
+   *     where
+   *
+   *   n is a positive integer input variable set to the order of r.
+   *
+   *   r is an n by n array. on input the full upper triangle
+   *     must contain the full upper triangle of the matrix r.
+   *     on output the full upper triangle is unaltered, and the
+   *     strict lower triangle contains the strict upper triangle
+   *     (transposed) of the upper triangular matrix s.
+   *
+   *   ldr is a positive integer input variable not less than n
+   *     which specifies the leading dimension of the array r.
+   *
+   *   ipvt is an integer input array of length n which defines the
+   *     permutation matrix p such that a*p = q*r. column j of p
+   *     is column ipvt(j) of the identity matrix.
+   *
+   *   diag is an input array of length n which must contain the
+   *     diagonal elements of the matrix d.
+   *
+   *   qtb is an input array of length n which must contain the first
+   *     n elements of the vector (q transpose)*b.
+   *
+   *   delta is a positive input variable which specifies an upper
+   *     bound on the euclidean norm of d*x.
+   *
+   *   par is a nonnegative variable. on input par contains an
+   *     initial estimate of the levenberg-marquardt parameter.
+   *     on output par contains the final estimate.
+   *
+   *   x is an output array of length n which contains the least
+   *     squares solution of the system a*x = b, sqrt(par)*d*x = 0,
+   *     for the output par.
+   *
+   *   sdiag is an output array of length n which contains the
+   *     diagonal elements of the upper triangular matrix s.
+   *
+   *   wa1 and wa2 are work arrays of length n.
+   *
+   *     subprograms called
+   *
+   *   minpack-supplied ... dpmpar,mp_enorm,qrsolv
+   *
+   *   fortran-supplied ... dabs,mp_dmax1,dmin1,dsqrt
+   *
+   *     argonne national laboratory. minpack project. march 1980.
+   *     burton s. garbow, kenneth e. hillstrom, jorge j. more
+   *
+   *     **********
+   */
+  int i,iter,ij,jj,j,jm1,jp1,k,l,nsing;
+  double dxnorm,fp,gnorm,parc,parl,paru;
+  double sum,temp;
+  static double zero = 0.0;
+  /* static double one = 1.0; */
+  static double p1 = 0.1;
+  static double p001 = 0.001;
+  
+  /*
+   *     compute and store in x the gauss-newton direction. if the
+   *     jacobian is rank-deficient, obtain a least squares solution.
+   */
+  nsing = n;
+  jj = 0;
+  for (j=0; j<n; j++) {
+    wa1[j] = qtb[j];
+    if ((r[jj] == zero) && (nsing == n))
+      nsing = j;
+    if (nsing < n)
+      wa1[j] = zero;
+    jj += ldr+1; /* [j+ldr*j] */
+  }
+
+  if (nsing >= 1) {
+    for (k=0; k<nsing; k++)
+      {
+       j = nsing - k - 1;
+       wa1[j] = wa1[j]/r[j+ldr*j];
+       temp = wa1[j];
+       jm1 = j - 1;
+       if (jm1 >= 0)
+         {
+           ij = ldr * j;
+           for (i=0; i<=jm1; i++)
+             {
+               wa1[i] -= r[ij]*temp;
+               ij += 1;
+             }
+         }
+      }
+  }
+  
+  for (j=0; j<n; j++) {
+    l = ipvt[j];
+    x[l] = wa1[j];
+  }
+  /*
+   *     initialize the iteration counter.
+   *     evaluate the function at the origin, and test
+   *     for acceptance of the gauss-newton direction.
+   */
+  iter = 0;
+  for (j=0; j<n; j++)
+    wa2[j] = diag[ifree[j]]*x[j];
+  dxnorm = mp_enorm(n,wa2);
+  fp = dxnorm - delta;
+  if (fp <= p1*delta) {
+    goto L220;
+  }
+  /*
+   *     if the jacobian is not rank deficient, the newton
+   *     step provides a lower bound, parl, for the zero of
+   *     the function. otherwise set this bound to zero.
+   */
+  parl = zero;
+  if (nsing >= n) {
+    for (j=0; j<n; j++)
+      {
+       l = ipvt[j];
+       wa1[j] = diag[ifree[l]]*(wa2[l]/dxnorm);
+      }
+    jj = 0;
+    for (j=0; j<n; j++)
+      {
+       sum = zero;
+       jm1 = j - 1;
+       if (jm1 >= 0)
+         {
+           ij = jj;
+           for (i=0; i<=jm1; i++)
+             {
+               sum += r[ij]*wa1[i];
+               ij += 1;
+             }
+         }
+       wa1[j] = (wa1[j] - sum)/r[j+ldr*j];
+       jj += ldr; /* [i+ldr*j] */
+      }
+    temp = mp_enorm(n,wa1);
+    parl = ((fp/delta)/temp)/temp;
+  }
+  /*
+   *     calculate an upper bound, paru, for the zero of the function.
+   */
+  jj = 0;
+  for (j=0; j<n; j++) {
+    sum = zero;
+    ij = jj;
+    for (i=0; i<=j; i++)
+      {
+       sum += r[ij]*qtb[i];
+       ij += 1;
+      }
+    l = ipvt[j];
+    wa1[j] = sum/diag[ifree[l]];
+    jj += ldr; /* [i+ldr*j] */
+  }
+  gnorm = mp_enorm(n,wa1);
+  paru = gnorm/delta;
+  if (paru == zero)
+    paru = MP_DWARF/mp_dmin1(delta,p1);
+  /*
+   *     if the input par lies outside of the interval (parl,paru),
+   *     set par to the closer endpoint.
+   */
+  *par = mp_dmax1( *par,parl);
+  *par = mp_dmin1( *par,paru);
+  if (*par == zero)
+    *par = gnorm/dxnorm;
+
+  /*
+   *     beginning of an iteration.
+   */
+ L150:
+  iter += 1;
+  /*
+   *    evaluate the function at the current value of par.
+   */
+  if (*par == zero)
+    *par = mp_dmax1(MP_DWARF,p001*paru);
+  temp = sqrt( *par );
+  for (j=0; j<n; j++)
+    wa1[j] = temp*diag[ifree[j]];
+  mp_qrsolv(n,r,ldr,ipvt,wa1,qtb,x,sdiag,wa2);
+  for (j=0; j<n; j++)
+    wa2[j] = diag[ifree[j]]*x[j];
+  dxnorm = mp_enorm(n,wa2);
+  temp = fp;
+  fp = dxnorm - delta;
+  /*
+   *    if the function is small enough, accept the current value
+   *    of par. also test for the exceptional cases where parl
+   *    is zero or the number of iterations has reached 10.
+   */
+  if ((fabs(fp) <= p1*delta)
+      || ((parl == zero) && (fp <= temp) && (temp < zero))
+      || (iter == 10))
+    goto L220;
+  /*
+   *    compute the newton correction.
+   */
+  for (j=0; j<n; j++) {
+    l = ipvt[j];
+    wa1[j] = diag[ifree[l]]*(wa2[l]/dxnorm);
+  }
+  jj = 0;
+  for (j=0; j<n; j++) {
+    wa1[j] = wa1[j]/sdiag[j];
+    temp = wa1[j];
+    jp1 = j + 1;
+    if (jp1 < n)
+      {
+       ij = jp1 + jj;
+       for (i=jp1; i<n; i++)
+         {
+           wa1[i] -= r[ij]*temp;
+           ij += 1; /* [i+ldr*j] */
+         }
+      }
+    jj += ldr; /* ldr*j */
+  }
+  temp = mp_enorm(n,wa1);
+  parc = ((fp/delta)/temp)/temp;
+  /*
+   *    depending on the sign of the function, update parl or paru.
+   */
+  if (fp > zero)
+    parl = mp_dmax1(parl, *par);
+  if (fp < zero)
+    paru = mp_dmin1(paru, *par);
+  /*
+   *    compute an improved estimate for par.
+   */
+  *par = mp_dmax1(parl, *par + parc);
+  /*
+   *    end of an iteration.
+   */
+  goto L150;
+  
+ L220:
+  /*
+   *     termination.
+   */
+  if (iter == 0)
+    *par = zero;
+  /*
+   *     last card of subroutine lmpar.
+   */
+}
+
+
+/************************enorm.c*************************/
+ 
+static 
+double mp_enorm(int n, double *x) 
+{
+  /*
+   *     **********
+   *
+   *     function enorm
+   *
+   *     given an n-vector x, this function calculates the
+   *     euclidean norm of x.
+   *
+   *     the euclidean norm is computed by accumulating the sum of
+   *     squares in three different sums. the sums of squares for the
+   *     small and large components are scaled so that no overflows
+   *     occur. non-destructive underflows are permitted. underflows
+   *     and overflows do not occur in the computation of the unscaled
+   *     sum of squares for the intermediate components.
+   *     the definitions of small, intermediate and large components
+   *     depend on two constants, rdwarf and rgiant. the main
+   *     restrictions on these constants are that rdwarf**2 not
+   *     underflow and rgiant**2 not overflow. the constants
+   *     given here are suitable for every known computer.
+   *
+   *     the function statement is
+   *
+   *   double precision function enorm(n,x)
+   *
+   *     where
+   *
+   *   n is a positive integer input variable.
+   *
+   *   x is an input array of length n.
+   *
+   *     subprograms called
+   *
+   *   fortran-supplied ... dabs,dsqrt
+   *
+   *     argonne national laboratory. minpack project. march 1980.
+   *     burton s. garbow, kenneth e. hillstrom, jorge j. more
+   *
+   *     **********
+   */
+  int i;
+  double agiant,floatn,s1,s2,s3,xabs,x1max,x3max;
+  double ans, temp;
+  double rdwarf = MP_RDWARF;
+  double rgiant = MP_RGIANT;
+  static double zero = 0.0;
+  static double one = 1.0;
+  
+  s1 = zero;
+  s2 = zero;
+  s3 = zero;
+  x1max = zero;
+  x3max = zero;
+  floatn = n;
+  agiant = rgiant/floatn;
+  
+  for (i=0; i<n; i++) {
+    xabs = fabs(x[i]);
+    if ((xabs > rdwarf) && (xabs < agiant))
+      {
+       /*
+        *          sum for intermediate components.
+        */
+       s2 += xabs*xabs;
+       continue;
+      }
+      
+    if (xabs > rdwarf)
+      {
+       /*
+        *             sum for large components.
+        */
+       if (xabs > x1max)
+         {
+           temp = x1max/xabs;
+           s1 = one + s1*temp*temp;
+           x1max = xabs;
+         }
+       else
+         {
+           temp = xabs/x1max;
+           s1 += temp*temp;
+         }
+       continue;
+      }
+    /*
+     *        sum for small components.
+     */
+    if (xabs > x3max)
+      {
+       temp = x3max/xabs;
+       s3 = one + s3*temp*temp;
+       x3max = xabs;
+      }
+    else       
+      {
+       if (xabs != zero)
+         {
+           temp = xabs/x3max;
+           s3 += temp*temp;
+         }
+      }
+  }
+  /*
+   *     calculation of norm.
+   */
+  if (s1 != zero) {
+    temp = s1 + (s2/x1max)/x1max;
+    ans = x1max*sqrt(temp);
+    return(ans);
+  }
+  if (s2 != zero) {
+    if (s2 >= x3max)
+      temp = s2*(one+(x3max/s2)*(x3max*s3));
+    else
+      temp = x3max*((s2/x3max)+(x3max*s3));
+    ans = sqrt(temp);
+  }
+  else
+    {
+      ans = x3max*sqrt(s3);
+    }
+  return(ans);
+  /*
+   *     last card of function enorm.
+   */
+}
+
+/************************lmmisc.c*************************/
+
+static 
+double mp_dmax1(double a, double b) 
+{
+  if (a >= b)
+    return(a);
+  else
+    return(b);
+}
+
+static 
+double mp_dmin1(double a, double b)
+{
+  if (a <= b)
+    return(a);
+  else
+    return(b);
+}
+
+static 
+int mp_min0(int a, int b)
+{
+  if (a <= b)
+    return(a);
+  else
+    return(b);
+}
+
+/************************covar.c*************************/
+/*
+c     **********
+c
+c     subroutine covar
+c
+c     given an m by n matrix a, the problem is to determine
+c     the covariance matrix corresponding to a, defined as
+c
+c                    t
+c           inverse(a *a) .
+c
+c     this subroutine completes the solution of the problem
+c     if it is provided with the necessary information from the
+c     qr factorization, with column pivoting, of a. that is, if
+c     a*p = q*r, where p is a permutation matrix, q has orthogonal
+c     columns, and r is an upper triangular matrix with diagonal
+c     elements of nonincreasing magnitude, then covar expects
+c     the full upper triangle of r and the permutation matrix p.
+c     the covariance matrix is then computed as
+c
+c                      t     t
+c           p*inverse(r *r)*p  .
+c
+c     if a is nearly rank deficient, it may be desirable to compute
+c     the covariance matrix corresponding to the linearly independent
+c     columns of a. to define the numerical rank of a, covar uses
+c     the tolerance tol. if l is the largest integer such that
+c
+c           abs(r(l,l)) .gt. tol*abs(r(1,1)) ,
+c
+c     then covar computes the covariance matrix corresponding to
+c     the first l columns of r. for k greater than l, column
+c     and row ipvt(k) of the covariance matrix are set to zero.
+c
+c     the subroutine statement is
+c
+c       subroutine covar(n,r,ldr,ipvt,tol,wa)
+c
+c     where
+c
+c       n is a positive integer input variable set to the order of r.
+c
+c       r is an n by n array. on input the full upper triangle must
+c         contain the full upper triangle of the matrix r. on output
+c         r contains the square symmetric covariance matrix.
+c
+c       ldr is a positive integer input variable not less than n
+c         which specifies the leading dimension of the array r.
+c
+c       ipvt is an integer input array of length n which defines the
+c         permutation matrix p such that a*p = q*r. column j of p
+c         is column ipvt(j) of the identity matrix.
+c
+c       tol is a nonnegative input variable used to define the
+c         numerical rank of a in the manner described above.
+c
+c       wa is a work array of length n.
+c
+c     subprograms called
+c
+c       fortran-supplied ... dabs
+c
+c     argonne national laboratory. minpack project. august 1980.
+c     burton s. garbow, kenneth e. hillstrom, jorge j. more
+c
+c     **********
+*/
+
+static 
+int mp_covar(int n, double *r, int ldr, int *ipvt, double tol, double *wa)
+{
+  int i, ii, j, jj, k, l;
+  int kk, kj, ji, j0, k0, jj0;
+  int sing;
+  double one = 1.0, temp, tolr, zero = 0.0;
+
+  /*
+   * form the inverse of r in the full upper triangle of r.
+   */
+
+#if 0
+  for (j=0; j<n; j++) {
+    for (i=0; i<n; i++) {
+      printf("%f ", r[j*ldr+i]);
+    }
+    printf("\n");
+  }
+#endif
+
+  tolr = tol*fabs(r[0]);
+  l = -1;
+  for (k=0; k<n; k++) {
+    kk = k*ldr + k;
+    if (fabs(r[kk]) <= tolr) break;
+
+    r[kk] = one/r[kk];
+    for (j=0; j<k; j++) {
+      kj = k*ldr + j;
+      temp = r[kk] * r[kj];
+      r[kj] = zero;
+
+      k0 = k*ldr; j0 = j*ldr;
+      for (i=0; i<=j; i++) {
+       r[k0+i] += (-temp*r[j0+i]);
+      }
+    }
+    l = k;
+  }
+
+  /* 
+   * Form the full upper triangle of the inverse of (r transpose)*r
+   * in the full upper triangle of r
+   */
+
+  if (l >= 0) {
+    for (k=0; k <= l; k++) {
+      k0 = k*ldr; 
+
+      for (j=0; j<k; j++) {
+       temp = r[k*ldr+j];
+
+       j0 = j*ldr;
+       for (i=0; i<=j; i++) {
+         r[j0+i] += temp*r[k0+i];
+       }
+      }
+      
+      temp = r[k0+k];
+      for (i=0; i<=k; i++) {
+       r[k0+i] *= temp;
+      }
+    }
+  }
+
+  /*
+   * For the full lower triangle of the covariance matrix
+   * in the strict lower triangle or and in wa
+   */
+  for (j=0; j<n; j++) {
+    jj = ipvt[j];
+    sing = (j > l);
+    j0 = j*ldr;
+    jj0 = jj*ldr;
+    for (i=0; i<=j; i++) {
+      ji = j0+i;
+
+      if (sing) r[ji] = zero;
+      ii = ipvt[i];
+      if (ii > jj) r[jj0+ii] = r[ji];
+      if (ii < jj) r[ii*ldr+jj] = r[ji];
+    }
+    wa[jj] = r[j0+j];
+  }
+
+  /*
+   * Symmetrize the covariance matrix in r
+   */
+  for (j=0; j<n; j++) {
+    j0 = j*ldr;
+    for (i=0; i<j; i++) {
+      r[j0+i] = r[i*ldr+j];
+    }
+    r[j0+j] = wa[j];
+  }
+
+#if 0
+  for (j=0; j<n; j++) {
+    for (i=0; i<n; i++) {
+      printf("%f ", r[j*ldr+i]);
+    }
+    printf("\n");
+  }
+#endif
+
+  return 0;
+}

http://git-wip-us.apache.org/repos/asf/impala/blob/0936e329/be/src/thirdparty/mpfit/mpfit.h
----------------------------------------------------------------------
diff --git a/be/src/thirdparty/mpfit/mpfit.h b/be/src/thirdparty/mpfit/mpfit.h
new file mode 100644
index 0000000..47469c8
--- /dev/null
+++ b/be/src/thirdparty/mpfit/mpfit.h
@@ -0,0 +1,198 @@
+/* 
+ * MINPACK-1 Least Squares Fitting Library
+ *
+ * Original public domain version by B. Garbow, K. Hillstrom, J. More'
+ *   (Argonne National Laboratory, MINPACK project, March 1980)
+ * 
+ * Tranlation to C Language by S. Moshier (moshier.net)
+ * 
+ * Enhancements and packaging by C. Markwardt
+ *   (comparable to IDL fitting routine MPFIT
+ *    see http://cow.physics.wisc.edu/~craigm/idl/idl.html)
+ */
+
+/* Header file defining constants, data structures and functions of
+   mpfit library 
+   $Id: mpfit.h,v 1.16 2016/06/02 19:14:16 craigm Exp $
+*/
+
+#ifndef MPFIT_H
+#define MPFIT_H
+
+/* This is a C library.  Allow compilation with a C++ compiler */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* MPFIT version string */
+#define MPFIT_VERSION "1.3"
+
+/* Definition of a parameter constraint structure */
+struct mp_par_struct {
+  int fixed;        /* 1 = fixed; 0 = free */
+  int limited[2];   /* 1 = low/upper limit; 0 = no limit */
+  double limits[2]; /* lower/upper limit boundary value */
+
+  char *parname;    /* Name of parameter, or 0 for none */
+  double step;      /* Step size for finite difference */
+  double relstep;   /* Relative step size for finite difference */
+  int side;         /* Sidedness of finite difference derivative 
+                       0 - one-sided derivative computed automatically
+                       1 - one-sided derivative (f(x+h) - f(x)  )/h
+                      -1 - one-sided derivative (f(x)   - f(x-h))/h
+                       2 - two-sided derivative (f(x+h) - f(x-h))/(2*h) 
+                       3 - user-computed analytical derivatives
+                   */
+  int deriv_debug;  /* Derivative debug mode: 1 = Yes; 0 = No;
+
+                       If yes, compute both analytical and numerical
+                       derivatives and print them to the console for
+                       comparison.
+
+                      NOTE: when debugging, do *not* set side = 3,
+                      but rather to the kind of numerical derivative
+                      you want to compare the user-analytical one to
+                      (0, 1, -1, or 2).
+                   */
+  double deriv_reltol; /* Relative tolerance for derivative debug
+                         printout */
+  double deriv_abstol; /* Absolute tolerance for derivative debug
+                         printout */
+};
+
+/* Just a placeholder - do not use!! */
+typedef void (*mp_iterproc)(void);
+
+/* Definition of MPFIT configuration structure */
+struct mp_config_struct {
+  /* NOTE: the user may set the value explicitly; OR, if the passed
+     value is zero, then the "Default" value will be substituted by
+     mpfit(). */
+  double ftol;    /* Relative chi-square convergence criterium Default: 1e-10 
*/
+  double xtol;    /* Relative parameter convergence criterium  Default: 1e-10 
*/
+  double gtol;    /* Orthogonality convergence criterium       Default: 1e-10 
*/
+  double epsfcn;  /* Finite derivative step size               Default: 
MP_MACHEP0 */
+  double stepfactor; /* Initial step bound                     Default: 100.0 
*/
+  double covtol;  /* Range tolerance for covariance calculation Default: 1e-14 
*/
+  int maxiter;    /* Maximum number of iterations.  If maxiter == MP_NO_ITER,
+                     then basic error checking is done, and parameter
+                     errors/covariances are estimated based on input
+                     parameter values, but no fitting iterations are done. 
+                    Default: 200
+                 */
+#define MP_NO_ITER (-1) /* No iterations, just checking */
+  int maxfev;     /* Maximum number of function evaluations, or 0 for no limit
+                    Default: 0 (no limit) */
+  int nprint;     /* Default: 1 */
+  int douserscale;/* Scale variables by user values?
+                    1 = yes, user scale values in diag;
+                    0 = no, variables scaled internally (Default) */
+  int nofinitecheck; /* Disable check for infinite quantities from user?
+                       0 = do not perform check (Default)
+                       1 = perform check 
+                    */
+  mp_iterproc iterproc; /* Placeholder pointer - must set to 0 */
+
+};
+
+/* Definition of results structure, for when fit completes */
+struct mp_result_struct {
+  double bestnorm;     /* Final chi^2 */
+  double orignorm;     /* Starting value of chi^2 */
+  int niter;           /* Number of iterations */
+  int nfev;            /* Number of function evaluations */
+  int status;          /* Fitting status code */
+  
+  int npar;            /* Total number of parameters */
+  int nfree;           /* Number of free parameters */
+  int npegged;         /* Number of pegged parameters */
+  int nfunc;           /* Number of residuals (= num. of data points) */
+
+  double *resid;       /* Final residuals
+                         nfunc-vector, or 0 if not desired */
+  double *xerror;      /* Final parameter uncertainties (1-sigma)
+                         npar-vector, or 0 if not desired */
+  double *covar;       /* Final parameter covariance matrix
+                         npar x npar array, or 0 if not desired */
+  char version[20];    /* MPFIT version string */
+};  
+
+/* Convenience typedefs */  
+typedef struct mp_par_struct mp_par;
+typedef struct mp_config_struct mp_config;
+typedef struct mp_result_struct mp_result;
+
+/* Enforce type of fitting function */
+typedef int (*mp_func)(int m, /* Number of functions (elts of fvec) */
+                      int n, /* Number of variables (elts of x) */
+                      double *x,      /* I - Parameters */
+                      double *fvec,   /* O - function values */
+                      double **dvec,  /* O - function derivatives (optional)*/
+                      void *private_data); /* I/O - function private data*/
+
+/* Error codes */
+#define MP_ERR_INPUT (0)         /* General input parameter error */
+#define MP_ERR_NAN (-16)         /* User function produced non-finite values */
+#define MP_ERR_FUNC (-17)        /* No user function was supplied */
+#define MP_ERR_NPOINTS (-18)     /* No user data points were supplied */
+#define MP_ERR_NFREE (-19)       /* No free parameters */
+#define MP_ERR_MEMORY (-20)      /* Memory allocation error */
+#define MP_ERR_INITBOUNDS (-21)  /* Initial values inconsistent w constraints*/
+#define MP_ERR_BOUNDS (-22)      /* Initial constraints inconsistent */
+#define MP_ERR_PARAM (-23)       /* General input parameter error */
+#define MP_ERR_DOF (-24)         /* Not enough degrees of freedom */
+
+/* Potential success status codes */
+#define MP_OK_CHI (1)            /* Convergence in chi-square value */
+#define MP_OK_PAR (2)            /* Convergence in parameter value */
+#define MP_OK_BOTH (3)           /* Both MP_OK_PAR and MP_OK_CHI hold */
+#define MP_OK_DIR (4)            /* Convergence in orthogonality */
+#define MP_MAXITER (5)           /* Maximum number of iterations reached */
+#define MP_FTOL (6)              /* ftol is too small; no further improvement*/
+#define MP_XTOL (7)              /* xtol is too small; no further improvement*/
+#define MP_GTOL (8)              /* gtol is too small; no further improvement*/
+
+/* Double precision numeric constants */
+#define MP_MACHEP0 2.2204460e-16
+#define MP_DWARF   2.2250739e-308
+#define MP_GIANT   1.7976931e+308
+
+#if 0
+/* Float precision */
+#define MP_MACHEP0 1.19209e-07
+#define MP_DWARF   1.17549e-38
+#define MP_GIANT   3.40282e+38
+#endif
+
+#define MP_RDWARF  (sqrt(MP_DWARF*1.5)*10)
+#define MP_RGIANT  (sqrt(MP_GIANT)*0.1)
+
+
+/* External function prototype declarations */
+extern int mpfit(mp_func funct, int m, int npar,
+                double *xall, mp_par *pars, mp_config *config, 
+                void *private_data, 
+                mp_result *result);
+
+
+
+/* C99 uses isfinite() instead of finite() */
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+#define mpfinite(x) isfinite(x)
+
+/* Microsoft C uses _finite(x) instead of finite(x) */
+#elif defined(_MSC_VER) && _MSC_VER
+#include <float.h>
+#define mpfinite(x) _finite(x)
+
+/* Default is to assume that compiler/library has finite() function */
+#else
+#define mpfinite(x) finite(x)
+
+#endif
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* MPFIT_H */

http://git-wip-us.apache.org/repos/asf/impala/blob/0936e329/be/src/util/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/be/src/util/CMakeLists.txt b/be/src/util/CMakeLists.txt
index 08002ed..c9e0f3c 100644
--- a/be/src/util/CMakeLists.txt
+++ b/be/src/util/CMakeLists.txt
@@ -17,6 +17,7 @@
 
 set(SQUEASEL_SRC_DIR "${CMAKE_SOURCE_DIR}/be/src/thirdparty/squeasel")
 set(MUSTACHE_SRC_DIR "${CMAKE_SOURCE_DIR}/be/src/thirdparty/mustache")
+set(MPFIT_SRC_DIR "${CMAKE_SOURCE_DIR}/be/src/thirdparty/mpfit")
 
 # where to put generated libraries
 set(LIBRARY_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}/util")
@@ -58,6 +59,7 @@ add_library(Util
   min-max-filter.cc
   min-max-filter-ir.cc
   minidump.cc
+  mpfit-util.cc
   network-util.cc
   openssl-util.cc
   os-info.cc
@@ -86,6 +88,7 @@ add_library(Util
   ${SQUEASEL_SRC_DIR}/squeasel.c
   webserver.cc
   ${MUSTACHE_SRC_DIR}/mustache.cc
+  ${MPFIT_SRC_DIR}/mpfit.c
 )
 add_dependencies(Util gen-deps gen_ir_descriptions)
 

http://git-wip-us.apache.org/repos/asf/impala/blob/0936e329/be/src/util/mpfit-util.cc
----------------------------------------------------------------------
diff --git a/be/src/util/mpfit-util.cc b/be/src/util/mpfit-util.cc
new file mode 100644
index 0000000..0f001ad
--- /dev/null
+++ b/be/src/util/mpfit-util.cc
@@ -0,0 +1,82 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "util/mpfit-util.h"
+
+#include <cstring>
+
+using namespace impala;
+using std::string;
+using std::function;
+
+ObjectiveFunction::ObjectiveFunction(string name, int num_params,
+    function<double (double, const double*)> fn)
+  : name_(name), num_params_(num_params), fn_(fn), num_points_(0), 
xs_(nullptr),
+    ys_(nullptr) {
+  DCHECK_GT(num_params, 0);
+}
+
+/// User-supplied function called by MpFit during curve fitting. MpFit 
requires this
+/// user-supplied function to compute "dy = y - fn(x)" for every known data 
point given
+/// the objective function 'fn' and the current list of parameter values 
'params'.
+/// Declared here so it can be used in ObjectiveFunction below.
+/// 'num_points' is the number of data points for fitting
+/// 'num_params' is the number of parameters in the objective function
+/// 'params' is an input array with 'num_params' parameter values
+/// 'dy' is an output parameter allocated by MpFit. It contains the "delta y" 
for each
+///      data point. This function sets dy[i] = y[i] - fn(x[i]) for each data 
point i and
+///      for the objective function 'fn'.
+/// 'obj_func' points to an ObjectiveFunction which contains the objective 
function
+///            and the data points
+/// The 'dvec' parameter is not important for our purposes. See the MpFit 
documentation.
+/// MpFit allows these user-supplied functions to indicate errors by returning 
a non-zero
+/// value. Our objective functions are simple and we always return 0.
+static int ComputeDeltaY(int num_points, int num_params, double* params, 
double* dy,
+    double** dvec, void* obj_func);
+
+bool ObjectiveFunction::LmsFit(const double* xs, const double* ys, int 
num_points) {
+  DCHECK(xs != nullptr);
+  DCHECK(ys != nullptr);
+  DCHECK_GE(num_points, 0);
+  num_points_ = num_points;
+  xs_ = xs;
+  ys_ = ys;
+
+  // Initialize function parameters.
+  params_.reset(new (std::nothrow) double[num_params_]);
+  if (params_ == nullptr) return false;
+  for (int i = 0; i < num_params_; ++i) params_.get()[i] = 1.0;
+
+  struct mp_config_struct config;
+  memset(&config, 0, sizeof(config));
+  // Maximum number of calls to SampledNdvMpFit(). Value determined 
empirically.
+  config.maxfev = 1000;
+  // Maximum number of fitting iterations. Value determined empirically.
+  config.maxiter = 200;
+  memset(&result_, 0, sizeof(result_));
+  int ret = mpfit(ComputeDeltaY, num_points, num_params_, params_.get(), 
nullptr,
+      &config, this, &result_);
+  // Any positive integer indicates success.
+  return ret > 0;
+}
+
+int ComputeDeltaY(int num_points, int num_params, double* params, double* dy,
+    double** dvec, void* obj_func) {
+  ObjectiveFunction* fn = reinterpret_cast<ObjectiveFunction*>(obj_func);
+  for (int i = 0; i < num_points; ++i) dy[i] = fn->GetDeltaY(i, params);
+  return 0;
+}

http://git-wip-us.apache.org/repos/asf/impala/blob/0936e329/be/src/util/mpfit-util.h
----------------------------------------------------------------------
diff --git a/be/src/util/mpfit-util.h b/be/src/util/mpfit-util.h
new file mode 100644
index 0000000..9eac3c2
--- /dev/null
+++ b/be/src/util/mpfit-util.h
@@ -0,0 +1,98 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+#ifndef IMPALA_MPFIT_UTIL_H
+#define IMPALA_MPFIT_UTIL_H
+
+#include <functional>
+#include <memory>
+#include <string>
+
+#include "common/logging.h"
+#include "common/status.h"
+#include "thirdparty/mpfit/mpfit.h"
+
+namespace impala {
+
+/// Objective function to be fit using the MpFit library. An objective 
function has
+/// exactly one 'x' variable and any number of additional non-variable 
parameters whose
+/// values are unknown. The purpose of curve fitting is to find the best 
values for those
+/// parameters given an objective function and a series of x/y data points.
+/// This class contains the objective function as well as the data points 
because that's
+/// the most natural setup for the MpFit API.
+/// Calling LmsFit() determines the parameters for the objective function.
+/// Calling GetY() computes the 'y' value for a given 'x' using the function 
parameters.
+/// The objective function is of type function<double (double, const double*)>
+/// By convention, the first argument is the value of the 'x' variable and the 
second
+/// argument is an array of function parameters which are determined during 
fitting.
+class ObjectiveFunction {
+ public:
+  ObjectiveFunction(std::string name, int num_params,
+      std::function<double (double, const double*)> fn);
+
+  /// Performs least mean squares (LMS) curve fitting using the MpFit library
+  /// against the provided x/y data points.
+  /// Returns true if fitting was successful, false otherwise.
+  bool LmsFit(const double* xs, const double* ys, int num_points) 
WARN_UNUSED_RESULT;
+
+  /// Evaluates the objective function over the given 'x' value.
+  double GetY(int64_t x) const {
+    DCHECK(params_ != nullptr);
+    return fn_(x, params_.get());
+  }
+
+  /// Returns the difference between the y value of data point 'pidx' and the
+  /// y value of the objective function with the given parameters over the x 
value
+  /// of the same point.
+  double GetDeltaY(int pidx, const double* params) const {
+    DCHECK_LT(pidx, num_points_);
+    return ys_[pidx] - fn_(xs_[pidx], params);
+  }
+
+  /// Returns the Chi-Square of fitting. This is an indication of how well the 
function
+  /// fits. Lower is better. Valid to call after LmsFit().
+  double GetError() const {
+    DCHECK(params_ != nullptr);
+    return result_.bestnorm;
+  }
+
+ private:
+  /// Human-readable name of this function. Used for debugging.
+  std::string name_;
+
+  /// Function parameters to be determined by fitting.
+  const int num_params_;
+  std::unique_ptr<double> params_;
+
+  /// MPFit result structure. Populated by in LmsFit(). All pointers in this 
structure
+  /// are optional and must be allocated and owned by the caller of mpfit(). 
Passing
+  /// nullptr indicates to MPFit that those fields should not be populated.
+  mp_result result_;
+
+  /// Objective function whose parameters should be fit to the data points.
+  std::function<double (double, const double*)> fn_;
+
+  /// Known x/y data points. Memory not owned.
+  int num_points_;
+  const double* xs_;
+  const double* ys_;
+};
+
+}
+
+#endif

http://git-wip-us.apache.org/repos/asf/impala/blob/0936e329/bin/rat_exclude_files.txt
----------------------------------------------------------------------
diff --git a/bin/rat_exclude_files.txt b/bin/rat_exclude_files.txt
index 73cd073..fdb1926 100644
--- a/bin/rat_exclude_files.txt
+++ b/bin/rat_exclude_files.txt
@@ -19,8 +19,9 @@ testdata/__init__.py
 tests/__init__.py
 www/index.html
 
-# See LICENSE.txt
+# See $IMPALA_HOME/LICENSE.txt
 be/src/gutil/*
+be/src/thirdparty/mpfit/*
 be/src/kudu/gutil
 www/highlight/*
 www/DataTables*/*

http://git-wip-us.apache.org/repos/asf/impala/blob/0936e329/bin/run_clang_tidy.sh
----------------------------------------------------------------------
diff --git a/bin/run_clang_tidy.sh b/bin/run_clang_tidy.sh
index ae389a9..e879b35 100755
--- a/bin/run_clang_tidy.sh
+++ b/bin/run_clang_tidy.sh
@@ -35,7 +35,10 @@ then
   echo "WARNING: compile failed" >&2
 fi
 
-DIRS=$(ls -d "${IMPALA_HOME}/be/src/"*/ | grep -v gutil | grep -v kudu | tr 
'\n' ' ')
+DIRS=$(ls -d "${IMPALA_HOME}/be/src/"*/ | grep -v gutil | grep -v kudu |\
+  grep -v thirdparty | tr '\n' ' ')
+# Include/exclude select thirdparty dirs.
+DIRS=$DIRS$(ls -d "${IMPALA_HOME}/be/src/thirdparty/"*/ | grep -v mpfit | tr 
'\n' ' ')
 PIPE_DIRS=$(echo "${DIRS}" | tr ' ' '|')
 
 # Reduce the concurrency to one less than the number of cores in the system. 
Note than

http://git-wip-us.apache.org/repos/asf/impala/blob/0936e329/fe/src/main/java/org/apache/impala/analysis/FunctionCallExpr.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/FunctionCallExpr.java 
b/fe/src/main/java/org/apache/impala/analysis/FunctionCallExpr.java
index 9574a02..12a34f7 100644
--- a/fe/src/main/java/org/apache/impala/analysis/FunctionCallExpr.java
+++ b/fe/src/main/java/org/apache/impala/analysis/FunctionCallExpr.java
@@ -469,8 +469,6 @@ public class FunctionCallExpr extends Expr {
       return;
     }
 
-    Type[] argTypes = collectChildReturnTypes();
-
     // User needs DB access.
     Db db = analyzer.getDb(fnName_.getDb(), Privilege.VIEW_METADATA, true);
     if (!db.containsFunction(fnName_.getFunction())) {
@@ -482,8 +480,7 @@ public class FunctionCallExpr extends Expr {
       // There is no version of COUNT() that takes more than 1 argument but 
after
       // the rewrite, we only need count(*).
       // TODO: fix how we rewrite count distinct.
-      argTypes = new Type[0];
-      Function searchDesc = new Function(fnName_, argTypes, Type.INVALID, 
false);
+      Function searchDesc = new Function(fnName_, new Type[0], Type.INVALID, 
false);
       fn_ = db.getFunction(searchDesc, 
Function.CompareMode.IS_NONSTRICT_SUPERTYPE_OF);
       type_ = fn_.getReturnType();
       // Make sure BE doesn't see any TYPE_NULL exprs
@@ -506,6 +503,28 @@ public class FunctionCallExpr extends Expr {
           "AVG requires a numeric or timestamp parameter: " + toSql());
     }
 
+    // SAMPLED_NDV() is only valid with two children. Invocations with an 
invalid number
+    // of children are gracefully handled when resolving the function 
signature.
+    if (fnName_.getFunction().equalsIgnoreCase("sampled_ndv")
+        && children_.size() == 2) {
+      if (!(children_.get(1) instanceof NumericLiteral)) {
+        throw new AnalysisException(
+            "Second parameter of SAMPLED_NDV() must be a numeric literal in 
[0,1]: " +
+            children_.get(1).toSql());
+      }
+      NumericLiteral samplePerc = (NumericLiteral) children_.get(1);
+      if (samplePerc.getDoubleValue() < 0 || samplePerc.getDoubleValue() > 
1.0) {
+        throw new AnalysisException(
+            "Second parameter of SAMPLED_NDV() must be a numeric literal in 
[0,1]: " +
+            samplePerc.toSql());
+      }
+      // Numeric literals with a decimal point are analyzed as decimals. 
Without this
+      // cast we might resolve to the wrong function because there is no 
exactly
+      // matching signature with decimal as the second argument.
+      children_.set(1, samplePerc.uncheckedCastTo(Type.DOUBLE));
+    }
+
+    Type[] argTypes = collectChildReturnTypes();
     Function searchDesc = new Function(fnName_, argTypes, Type.INVALID, false);
     fn_ = db.getFunction(searchDesc, 
Function.CompareMode.IS_NONSTRICT_SUPERTYPE_OF);
     if (fn_ == null || (!isInternalFnCall_ && !fn_.userVisible())) {

Reply via email to