isn't this related to http://projects.scipy.org/numpy/ticket/626 percentile() and clamp()
which was set to invalid -Sebastian On Sun, May 16, 2010 at 12:11 AM, <[email protected]> wrote: > Author: oliphant > Date: 2010-05-15 17:11:10 -0500 (Sat, 15 May 2010) > New Revision: 8413 > > Modified: > trunk/numpy/lib/function_base.py > Log: > Add percentile function. > > Modified: trunk/numpy/lib/function_base.py > =================================================================== > --- trunk/numpy/lib/function_base.py 2010-05-13 12:47:23 UTC (rev 8412) > +++ trunk/numpy/lib/function_base.py 2010-05-15 22:11:10 UTC (rev 8413) > @@ -1,6 +1,6 @@ > __docformat__ = "restructuredtext en" > __all__ = ['select', 'piecewise', 'trim_zeros', > - 'copy', 'iterable', > + 'copy', 'iterable', 'percentile', > 'diff', 'gradient', 'angle', 'unwrap', 'sort_complex', 'disp', > 'extract', 'place', 'nansum', 'nanmax', 'nanargmax', > 'nanargmin', 'nanmin', 'vectorize', 'asarray_chkfinite', 'average', > @@ -2804,7 +2804,7 @@ > > See Also > -------- > - mean > + mean, percentile > > Notes > ----- > @@ -2863,6 +2863,132 @@ > # and check, use out array. > return mean(sorted[indexer], axis=axis, out=out) > > +def percentile(a, q, axis=None, out=None, overwrite_input=False): > + """ > + Compute the qth percentile of the data along the specified axis. > + > + Returns the qth percentile of the array elements. > + > + Parameters > + ---------- > + a : array_like > + Input array or object that can be converted to an array. > + q : float in range of [0,100] (or sequence of floats) > + percentile to compute which must be between 0 and 100 inclusive > + axis : {None, int}, optional > + Axis along which the percentiles are computed. The default > (axis=None) > + is to compute the median along a flattened version of the array. > + out : ndarray, optional > + Alternative output array in which to place the result. It must > + have the same shape and buffer length as the expected output, > + but the type (of the output) will be cast if necessary. > + overwrite_input : {False, True}, optional > + If True, then allow use of memory of input array (a) for > + calculations. The input array will be modified by the call to > + median. This will save memory when you do not need to preserve > + the contents of the input array. Treat the input as undefined, > + but it will probably be fully or partially sorted. Default is > + False. Note that, if `overwrite_input` is True and the input > + is not already an ndarray, an error will be raised. > + > + Returns > + ------- > + pcntile : ndarray > + A new array holding the result (unless `out` is specified, in > + which case that array is returned instead). If the input contains > + integers, or floats of smaller precision than 64, then the output > + data-type is float64. Otherwise, the output data-type is the same > + as that of the input. > + > + See Also > + -------- > + mean, median > + > + Notes > + ----- > + Given a vector V of length N, the qth percentile of V is the qth ranked > + value in a sorted copy of V. A weighted average of the two nearest > neighbors > + is used if the normalized ranking does not match q exactly. > + The same as the median if q is 0.5; the same as the min if q is 0; > + and the same as the max if q is 1 > + > + Examples > + -------- > + >>> a = np.array([[10, 7, 4], [3, 2, 1]]) > + >>> a > + array([[10, 7, 4], > + [ 3, 2, 1]]) > + >>> np.percentile(a, 0.5) > + 3.5 > + >>> np.percentile(a, 0.5, axis=0) > + array([ 6.5, 4.5, 2.5]) > + >>> np.percentile(a, 0.5, axis=1) > + array([ 7., 2.]) > + >>> m = np.percentile(a, 0.5, axis=0) > + >>> out = np.zeros_like(m) > + >>> np.percentile(a, 0.5, axis=0, out=m) > + array([ 6.5, 4.5, 2.5]) > + >>> m > + array([ 6.5, 4.5, 2.5]) > + >>> b = a.copy() > + >>> np.percentile(b, 0.5, axis=1, overwrite_input=True) > + array([ 7., 2.]) > + >>> assert not np.all(a==b) > + >>> b = a.copy() > + >>> np.percentile(b, 0.5, axis=None, overwrite_input=True) > + 3.5 > + >>> assert not np.all(a==b) > + > + """ > + if q == 0: > + return a.min(axis=axis, out=out) > + elif q == 100: > + return a.max(axis=axis, out=out) > + > + if overwrite_input: > + if axis is None: > + sorted = a.ravel() > + sorted.sort() > + else: > + a.sort(axis=axis) > + sorted = a > + else: > + sorted = sort(a, axis=axis) > + if axis is None: > + axis = 0 > + > + return _compute_qth_percentile(sorted, q, axis, out) > + > +# handle sequence of q's without calling sort multiple times > +def _compute_qth_percentile(sorted, q, axis, out): > + if not isscalar(q): > + return [_compute_qth_percentile(sorted, qi, axis, out) > + for qi in q] > + q = q / 100.0 > + if (q < 0) or (q > 1): > + raise ValueError, "percentile must be either in the range [0,100]" > + > + indexer = [slice(None)] * sorted.ndim > + Nx = sorted.shape[axis] > + index = q*(Nx-1) > + i = int(index) > + if i == index: > + indexer[axis] = slice(i, i+1) > + weights = array(1) > + sumval = 1.0 > + else: > + indexer[axis] = slice(i, i+2) > + j = i + 1 > + weights = array([(j - index), (index - i)],float) > + wshape = [1]*sorted.ndim > + wshape[axis] = 2 > + weights.shape = wshape > + sumval = weights.sum() > + > + # Use add.reduce in both cases to coerce data type as well as > + # check and use out array. > + return add.reduce(sorted[indexer]*weights, axis=axis, out=out)/sumval > + > def trapz(y, x=None, dx=1.0, axis=-1): > """ > Integrate along the given axis using the composite trapezoidal rule. > > _______________________________________________ > Numpy-svn mailing list > [email protected] > http://mail.scipy.org/mailman/listinfo/numpy-svn > _______________________________________________ NumPy-Discussion mailing list [email protected] http://mail.scipy.org/mailman/listinfo/numpy-discussion
