Hi,

First, I wanted to thank everybody who helped me to clarify many points concerning memory layout of numpy arrays; I think I have a much clearer idea of the way numpy arrays behave at the C level. I've used all those informations to correct my initial implementation of clip to improve the clip function for common cases: it speeds up things only for native endianness, and scalar min and max (both contiguous and non contiguous cases). I've attached the new version code (only for one type to avoid too big emails; you have to dl the archive to actually compile the implementation); the whole package with tests + profiling script is there:

   http://www.ar.media.kyoto-u.ac.jp/members/david/archives/fastclip.tgz

If this looks Ok, I will prepare a patch against current numpy, with the C sources being generated by numpy.distutils instead of the tool I am using now (autogen)

Now, to improve other cases (mainly implementing an in-place clip function + non scalar min/max), there are some clarifications needed, mainly related to broadcast rules the current clip implementation which seems to break numpy conventions:

1: the old implementation returns an array which has the same endianness than the input array. This is a bit odd, because when the input is byte swapped, the returned array is still byte swapped, which seems to be against numpy convention. Here is some code which seem odd to me (code assumes little endian machine)

a = numpy.random.randn(3, 2)
b = a.astype(a.dtype.newbyteorder('>'))
c = b.copy()
assert a.dtype.isnative
assert not b.dtype.isnative
assert not c.dtype.isnative
# Endianness behaviour of basic operation with numpy arrays
print (a + b).dtype.isnative #one arg is non native -> returns native
print (b + c).dtype.isnative # both args not native -> returns native
# Now, what's happening endian-wise with clip:
print numpy.clip(a, -0.5, 0.5).dtype.isnative # everything native -> returns native print numpy.clip(b, -0.5, 0.5).dtype.isnative # input array non native -> returns non native print numpy.clip(b, a, 0.5).dtype.isnative # input array non native, native array min -> returns native

The fact that the output's endianness depends on min/max arguments being arrays or not does not seem really coherent ?

2: the old implementation does not upcast the input array. If the input is int32, and min/max are float32, the function fails; if input is float32, and min/max float64, the output is still float32. Again, this seems against the expected numpy behaviour ? 3: the old implementation supports clipping with complex arrays. I don't see any obvious meaningful implementation of clipping in those cases (using the module to compare them ?)

If breaking those oddities is allowed, this would make the improvements much simpler to code,

  cheers,

  David
#include "Python.h"
#include "structmember.h"

#include "numpy/arrayobject.h"

#include "clip_imp.c"

#define _ARET(x) PyArray_Return((PyArrayObject *)(x))
#define GET_REF_COUNT(x) ( ((PyObject*)(x))->ob_refcnt )

/* function callable from python */
static PyObject* PyArray_MyClip_GlobalMeth(PyObject *dummy, PyObject *args);
static PyObject *array_my_clip(PyArrayObject *self, PyObject *args, PyObject *kwds);

/* function NOT supposed to be callable from python */
static PyObject* PyArray_FastClip(PyArrayObject *input, PyObject *min, 
        PyObject *max, PyArrayObject *out);
static PyObject* PyArray_NumericFastClip(PyArrayObject *in, PyObject *min, 
        PyObject *max, PyArrayObject *out);

/* Not python specific functions */

static PyMethodDef mymethods[] = {
    {"myclip", PyArray_MyClip_GlobalMeth, METH_VARARGS, NULL}, 
    {"mykwclip", (PyCFunction)array_my_clip, METH_VARARGS | METH_KEYWORDS, NULL}, 
    {NULL, NULL, 0, NULL} /* Sentinel */
};

PyMODINIT_FUNC init_fast_clip(void);

PyMODINIT_FUNC
init_fast_clip(void)
{
    (void)Py_InitModule("_fast_clip", mymethods);
    import_array();
}

static PyObject *
array_my_clip(PyArrayObject *self, PyObject *args, PyObject *kwds)
{
    PyObject *min, *max;
    PyArrayObject *out=NULL;
    static char *kwlist[] = {"in", "min", "max", "out", NULL};

    if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&OOO&", kwlist,
                        PyArray_OutputConverter, &self, 
                        &min, &max, PyArray_OutputConverter, &out)) {
        return NULL;
    }

    return _ARET(PyArray_FastClip(self, min, max, out));
}


/*
 * expect 3 inputs in args: the input array, minvalue, maxvalue. input array
 * expected to be con vertible to array, minval and maxval convertibale to 
 * scalars.
 *
 * Only handles native double for now. For all other cases, pass down to
 * numpy clip func.
 */
static PyObject* PyArray_MyClip_GlobalMeth(PyObject *dummy, PyObject *args)
{
    PyObject        *in_o, *min_o, *max_o;
    (void)dummy;

    if (!PyArg_ParseTuple(args, "O!OO", &PyArray_Type, &in_o, &min_o, &max_o)) {
        return NULL;
    }

    return PyArray_FastClip((PyArrayObject*)in_o, min_o, max_o, (PyArrayObject*)NULL);
}

/*
 * Behaviour of old clip:
 *  - Endianness wise:
 *      - returns an array which has the same endianness 
 *      than input
 *      - if min and/or max has non native endianness, still 
 *      returns input's endianness
 *  - If out is NULL, does a copy  of input. Otherwise, use 
 *  the given out (may fail in this case).
 *  - If min and/or max is not scalar, it has to be the same 
 *  shape than input.
 *
 *  Problems :
 *  - Complex case ?
 *  - if input type is "smaller" than min or max ? current clip 
 *  does not upcast, we do.
 *
 */
static PyObject* PyArray_FastClip(PyArrayObject *in, PyObject *min, 
        PyObject *max, PyArrayObject *out)
{
    int is_num, typenum;

    PyObject*  tmp;
     
    /* Get common type, used for working buffer, and endianness of input */
    typenum = 0;
    typenum = PyArray_ObjectType((PyObject*)in, typenum);
    typenum = PyArray_ObjectType((PyObject*)min, typenum);
    typenum = PyArray_ObjectType((PyObject*)max, typenum);

    is_num  = PyTypeNum_ISNUMBER(typenum);

    if(is_num) {
        /* 
         * Numeric case
        */
        tmp     = PyArray_NumericFastClip(in, min, max, out);
        if (tmp == NULL) {
            goto fail;
        }
    } else {
        /*
         * Non Numeric case (can current clip really handle those ?)
         */

        /* TODO: 
         *  - check that I got the refcount right here with PyArray_Clip
         *  - custom implementation 
         */
        tmp     = PyArray_Clip(in, min, max, out);
        if (tmp == NULL) {
            goto fail;
        }
    }

    return tmp;

fail:
    return NULL;
}

/*
 * Before expanding this function, we need to clarify some cases:
 *  - if any input is not native endianness, what to do ?
 *  - if all input does not have same type, what to do ?
 *  - complex case: what to do ?
 *  - why not having a inplace clip ?
 */
static PyObject* PyArray_NumericFastClip(PyArrayObject *in, PyObject *min, 
        PyObject *max, PyArrayObject *out)
{
    PyArray_Descr   *ndescr;
    PyArrayObject   *min_a, *max_a, *w_a;
    PyObject        *ret;
    
    int is_scalar, is_in_native, is_in_aligned, is_real;
    int typenum, st, flags;

    is_in_native    = PyArray_ISNOTSWAPPED(in);
    is_in_aligned   = PyArray_ISALIGNED(in);

    typenum = 0;
    typenum = PyArray_ObjectType((PyObject*)in, typenum);
    typenum = PyArray_ObjectType((PyObject*)min, typenum);
    typenum = PyArray_ObjectType((PyObject*)max, typenum);

    /* 
     * Get min and max as numpy arrays. If not scalar, check that they have
     * compatible shape
     *
     * This should be put after working buffer creation once we implement
     * array min/max... The cleaning has to be changed, then
     */
    min_a   = (PyArrayObject *)
        PyArray_FromObject(min, typenum, 0, 0);
    if (min_a == NULL) {
        PyErr_SetString(PyExc_TypeError,
                        "Error while converting min to value,"\
                        " sorry");
        //goto clean_in_a;
        goto fail;
    }
    is_scalar   = PyArray_CheckScalar(min_a);
    if (!is_scalar) {
        /*
         * Check that same shape
         */
    }

    max_a   = (PyArrayObject *)
        PyArray_ContiguousFromAny(max, typenum, 0, 0);
    if (max_a == NULL) {
        PyErr_SetString(PyExc_TypeError,
                        "Error while converting max to value,"\
                        " sorry");
        goto clean_min_a;
    }
    is_scalar   = PyArray_CheckScalar(max_a);
    if (!is_scalar) {
        /*
         * Check that same shape
         */
    }
    is_scalar   = PyArray_CheckScalar(min_a) && PyArray_CheckScalar(max_a);
    is_real     = !PyTypeNum_ISCOMPLEX(typenum);
    
    /*
     * Now, check wether we need to create a working array, or if we can
     * use an existing one (input or output)
     */
    if (out == NULL && is_in_native && is_in_aligned && is_scalar && is_real) {
        /*
         * Create a working buffer of type typenum
         */
        ndescr  = PyArray_DescrFromType(typenum);
        if (ndescr == NULL) {
            goto fail;
        }

        /* 
         * Creating a working array; as it is a copy, we can ask all nice
         * properties
         */
        flags   = NPY_ENSURECOPY | NPY_IN_ARRAY | NPY_CONTIGUOUS | NPY_NOTSWAPPED;
        //flags   = NPY_IN_ARRAY | NPY_CONTIGUOUS | NPY_NOTSWAPPED;
        Py_INCREF(ndescr);
        w_a     = (PyArrayObject*)PyArray_FromArray(in, ndescr, flags);
        if (w_a  == NULL) {
            //Py_XDECREF(ndescr);
            goto clean_ndescr;
        } 
    } else {
        /*
         * out != NULL: For now, does not handle this case, 
         * pass it to old implementation.
         *
         * Before handling this case, we have to check that out is what we
         * want.
         */
        ret = PyArray_Clip(in, min, max, out);
        if (ret == NULL) {
            goto fail;
        }

        return ret;
    }

    /*
     * Now, we got the working buffer w_a
     */
    //is_w_native     = PyArray_ISNOTSWAPPED(w_a);

    /*
     * Now, we have all necessary information to call the correct 
     * implementation * depending on arguments type, shape, etc...
     *
     * Case to differentiate:
     *  - scalar min/max: fast implementation.
     *  - non scalar min/max: pass back to old implementation for now.
     */
    if (is_scalar) {
        /*
         * Numeric, scalar case
         */
        st  = numeric_native_scalar_generic_clip(w_a, min_a, max_a);
        if (st != 0 ) {
            goto clean_in_a;
        }
        ret = (PyObject*)w_a; 
    } else {
        /*
         * Numeric, non scalar case 
         * Numeric, scalar case, non native input
         */
        ret     = PyArray_Clip(w_a, min, max, NULL);
        if (ret == NULL) {
            goto clean_max_a;
        }
        Py_XDECREF(w_a);
        //fprintf(stderr, "%s:%s, line %d: in (%d), min (%d), 
        //          max (%d), out (%d)\n",
        //         __FILE__, __func__, __LINE__, 
        //         GET_REF_COUNT(in_a), GET_REF_COUNT(min_a), 
        //         GET_REF_COUNT(max_a), GET_REF_COUNT(out));
    }
    
    /*
     * We're done, clean everything
     */
    Py_XDECREF(ndescr);
    Py_XDECREF(max_a);
    Py_XDECREF(min_a);

    return ret;

clean_in_a:
    Py_XDECREF(w_a);
clean_ndescr:
    Py_XDECREF(ndescr);
clean_max_a:
    Py_XDECREF(max_a);
clean_min_a:
    Py_XDECREF(min_a);
fail:
    return NULL;
}
/*
 * Last Change: Sun Jan 14 07:00 PM 2007 J
 * 
 * vim:syntax=c
 */


/* 
 * (npy_double version) Clip in-place an array with scalars min and max
 * (native endianness), contiguous array  or not.
 *
 */
static int numeric_native_scalar_dbl_clip(
        PyArrayObject* in, 
        npy_double min, 
        npy_double max) 
{
    int                     st;
    npy_intp                ni, i;
    npy_double curel, *curptr;

    PyArrayIterObject   *iter;

    if (PyArray_ISCONTIGUOUS(in) || PyArray_ISFORTRAN(in)) {
        /*
         * Special case where we can easily iterate over all
         * items of the input in a C-way (eg using standard array indexing)
         */
        ni      = PyArray_Size((PyObject*)in);
        curptr  = (npy_double *)in->data;
        for (i = 0; i < ni; ++i) {  
            if (curptr[i] < min) {
                curptr[i]   = min;
            } else if (curptr[i] > max) {
                curptr[i]   = max;
            }
        }
    } else {
        /*
         * Generic case.
         */
        iter    = (PyArrayIterObject*)PyArray_IterNew((PyObject*)in);
        if (iter == NULL) {
            st = -1;
            goto fail;
        }
        
        while(iter->index < iter->size) {
            curel   = ((npy_double*) iter->dataptr)[0];
            if (curel < min) {
                curel   = min;
            } else if (curel > max) {
                curel   = max;
            }
            ((npy_double*) iter->dataptr)[0] = curel;
            PyArray_ITER_NEXT(iter);
        }

        Py_DECREF(iter);
    }

    return 0;

fail:
    return st;
}

/* This function does the dispatching based on input type */
/*
 * Expect 
 *  - input, min and max to be same typenum
 *  - min and max are scalar arrays
 *  - input is writeable
 *  - input, min and max are native endianness
 *  - input, min and max are aligned
 *
 * Those requirements are checked in debug mode only
 */
static int numeric_native_scalar_generic_clip(PyArrayObject* input, 
    PyArrayObject* min, PyArrayObject *max)
{
    int         typenum, st;

    /* 
     * Check we have what we expect for type
     */
    typenum = input->descr->type_num;
    if(typenum != min->descr->type_num 
        || typenum != max->descr->type_num) {
        PyErr_SetString(PyExc_TypeError,
                        "BUG: expected all objs to be same type");
        goto fail;
    }

    /*
     * Checking requirements (IN DEBUG MODE ONLY)
     */
    assert (PyArray_ISALIGNED(input));
    assert (PyArray_ISWRITEABLE(input));
    assert (PyArray_ISNOTSWAPPED(input));

    assert (PyArray_ISALIGNED(min));
    assert (PyArray_ISNOTSWAPPED(min));
    assert (PyArray_CheckScalar(min));

    assert (PyArray_ISALIGNED(max));
    assert (PyArray_ISNOTSWAPPED(max));
    assert (PyArray_CheckScalar(max));

    /*
     * calling type specific implementation
     */
    switch (typenum) {
        /* 
         * Take care to convert data pointer to correct pointer type before
         * indexing ! 
         */
        case NPY_DOUBLE:
            st  = numeric_native_scalar_dbl_clip(
                    input, 
                    ((npy_double*)min->data)[0], 
                    ((npy_double*)max->data)[0]); 
            break;
        
        default:
            PyErr_SetString(PyExc_TypeError,
                            "type is not supported yet,"\
                            " sorry");
            goto fail;
            break;
    }
    return st;

fail:
    return -1;
}

_______________________________________________
Numpy-discussion mailing list
[email protected]
http://projects.scipy.org/mailman/listinfo/numpy-discussion

Reply via email to