David Cournapeau wrote:
On 8/17/07, Eric Firing <[EMAIL PROTECTED]> wrote:
In looking at maskedarray performance, I found that the filled()
function or method is a bottleneck. I think it can be sped up by using
putmask instead of indexed assignment, but I found that putmask itself
is slower than it needs to be. So I followed David Cournapeau's example
of fastclip and made a similar fastputmask. The diff relative to
current svn (3967) is attached.
Great ! putmask was actually the function I wanted to improve after
clip, because it is the second bottleneck for matplotlib imagesc :) I
would not be suprised if now imagesc has descent speed compared to
matlab.
I hope someone will take a look and either tell me what I need to fix or
commit it as-is.
It looks like there are a lot of spurious diff in you patch (space vs
tab, or endline problems ?). Could you regenerate a patch without
them, since half of the patch is "garbage" ? It would be much easier
to see the changes you actually made.
Agreed. This is because my editor deletes spurious whitespace that was
already in the file. If I ruled the world, the spurious whitespace and
hard tabs would never be there in the first place. (If I were younger I
might use smileys in places like this, but they just don't come
naturally to me.) As far as I can see there is no way of using svn diff
to deal with this automatically, so in the attached revision I have
manually removed chunks resulting solely from whitespace. Some of the
remaining chunks unfortunately have a mixture of whitespace and
substantive differences. And manually removing chunks is risky.
Is there a better way to handle this problem? A better way to make
diffs? Or any possibility of routinely cleaning the junk out of the svn
source files? (Yes, I know--what is junk to me probably results from
what others consider good behavior of the editor.)
Eric
cheers,
David
_______________________________________________
Numpy-discussion mailing list
Numpy-discussion@scipy.org
http://projects.scipy.org/mailman/listinfo/numpy-discussion
Index: numpy/core/include/numpy/ndarrayobject.h
===================================================================
--- numpy/core/include/numpy/ndarrayobject.h (revision 3964)
+++ numpy/core/include/numpy/ndarrayobject.h (working copy)
@@ -1049,6 +1049,8 @@
typedef void (PyArray_FastClipFunc)(void *in, npy_intp n_in, void *min,
void *max, void *out);
+typedef void (PyArray_FastPutmaskFunc)(void *in, void *mask, npy_intp n_in,
+ void *values, npy_intp nv);
typedef struct {
npy_intp *ptr;
@@ -1126,6 +1128,7 @@
int *cancastto;
PyArray_FastClipFunc *fastclip;
+ PyArray_FastPutmaskFunc *fastputmask;
} PyArray_ArrFuncs;
#define NPY_ITEM_REFCOUNT 0x01 /* The item must be reference counted
Index: numpy/core/src/multiarraymodule.c
===================================================================
--- numpy/core/src/multiarraymodule.c (revision 3964)
+++ numpy/core/src/multiarraymodule.c (working copy)
@@ -4065,6 +4065,7 @@
static PyObject *
PyArray_PutMask(PyArrayObject *self, PyObject* values0, PyObject* mask0)
{
+ PyArray_FastPutmaskFunc *func;
PyArrayObject *mask, *values;
int i, chunk, ni, max_item, nv, tmp;
char *src, *dest;
@@ -4094,45 +4095,51 @@
chunk = self->descr->elsize;
mask = (PyArrayObject *)\
- PyArray_FROM_OTF(mask0, PyArray_BOOL, CARRAY | FORCECAST);
- if (mask == NULL) goto fail;
+ PyArray_FROM_OTF(mask0, PyArray_BOOL, CARRAY | FORCECAST);
+ if (mask == NULL) goto fail;
ni = PyArray_SIZE(mask);
if (ni != max_item) {
PyErr_SetString(PyExc_ValueError,
- "putmask: mask and data must be "\
- "the same size");
+ "putmask: mask and data must be "\
+ "the same size");
goto fail;
}
values = (PyArrayObject *)\
- PyArray_ContiguousFromAny(values0, self->descr->type_num, 0, 0);
- if (values == NULL) goto fail;
- nv = PyArray_SIZE(values); /* zero if null array */
+ PyArray_ContiguousFromAny(values0, self->descr->type_num, 0, 0);
+ if (values == NULL) goto fail;
+ nv = PyArray_SIZE(values); /* zero if null array */
if (nv <= 0) {
Py_XDECREF(values);
Py_XDECREF(mask);
Py_INCREF(Py_None);
return Py_None;
}
- if (nv > 0) {
- if (PyDataType_REFCHK(self->descr)) {
+ if (PyDataType_REFCHK(self->descr)) {
+ for(i=0; i<ni; i++) {
+ tmp = ((Bool *)(mask->data))[i];
+ if (tmp) {
+ src = values->data + chunk * (i % nv);
+ PyArray_Item_INCREF(src, self->descr);
+ PyArray_Item_XDECREF(dest+i*chunk, self->descr);
+ memmove(dest + i * chunk, src, chunk);
+ }
+ }
+ }
+ else {
+ func = self->descr->f->fastputmask;
+ if (func == NULL) {
for(i=0; i<ni; i++) {
- src = values->data + chunk * (i % nv);
tmp = ((Bool *)(mask->data))[i];
if (tmp) {
- PyArray_Item_INCREF(src, self->descr);
- PyArray_Item_XDECREF(dest+i*chunk, self->descr);
+ src = values->data + chunk * (i % nv);
memmove(dest + i * chunk, src, chunk);
}
- }
+ }
}
else {
- for(i=0; i<ni; i++) {
- src = values->data + chunk * (i % nv);
- tmp = ((Bool *)(mask->data))[i];
- if (tmp) memmove(dest + i * chunk, src, chunk);
- }
- }
+ func(dest, mask->data, ni, values->data, nv);
+ }
}
Py_XDECREF(values);
Index: numpy/core/src/arraytypes.inc.src
===================================================================
--- numpy/core/src/arraytypes.inc.src (revision 3964)
+++ numpy/core/src/arraytypes.inc.src (working copy)
@@ -2044,62 +2044,98 @@
-/************************
- * Fast clip functions
- *************************/
-
-/**begin repeat
-#name=BOOL,BYTE, UBYTE, SHORT, USHORT, INT, UINT, LONG, ULONG, LONGLONG, ULONGLONG, FLOAT, DOUBLE, LONGDOUBLE#
-#type= Bool, byte, ubyte, short, ushort, int, uint, long, ulong, longlong, ulonglong, float, double, longdouble#
-*/
+/************************
+ * Fast clip functions
+ *************************/
+
+/**begin repeat
+#name=BOOL,BYTE, UBYTE, SHORT, USHORT, INT, UINT, LONG, ULONG, LONGLONG, ULONGLONG, FLOAT, DOUBLE, LONGDOUBLE#
+#type= Bool, byte, ubyte, short, ushort, int, uint, long, ulong, longlong, ulonglong, float, double, longdouble#
+*/
static void
@[EMAIL PROTECTED](@type@ *in, intp ni, @type@ *min, @type@ *max, @type@ *out)
-{
- register npy_intp i;
+{
+ register npy_intp i;
@type@ max_val, min_val;
-
+
max_val = *max;
min_val = *min;
-
- for (i = 0; i < ni; i++) {
- if (in[i] < min_val) {
- out[i] = min_val;
- } else if (in[i] > max_val) {
- out[i] = max_val;
- }
- }
-
+
+ for (i = 0; i < ni; i++) {
+ if (in[i] < min_val) {
+ out[i] = min_val;
+ } else if (in[i] > max_val) {
+ out[i] = max_val;
+ }
+ }
+
return;
-}
-/**end repeat**/
+}
+/**end repeat**/
-/**begin repeat
-#name=CFLOAT, CDOUBLE, CLONGDOUBLE#
-#type= cfloat, cdouble, clongdouble#
-*/
+/**begin repeat
+#name=CFLOAT, CDOUBLE, CLONGDOUBLE#
+#type= cfloat, cdouble, clongdouble#
+*/
static void
[EMAIL PROTECTED]@_fastclip(@type@ *in, intp ni, @type@ *min, @type@ *max, @type@ *out)
-{
- register npy_intp i;
[EMAIL PROTECTED]@_fastclip(@type@ *in, intp ni, @type@ *min, @type@ *max, @type@ *out)
+{
+ register npy_intp i;
@type@ max_val, min_val;
-
+
min_val = *min;
max_val = *max;
- for (i = 0; i < ni; i++) {
- if (PyArray_CLT(in[i], min_val)) {
+ for (i = 0; i < ni; i++) {
+ if (PyArray_CLT(in[i], min_val)) {
out[i] = min_val;
- } else if (PyArray_CGT(in[i], max_val)) {
+ } else if (PyArray_CGT(in[i], max_val)) {
out[i] = max_val;
- }
- }
+ }
+ }
return;
-}
-
-/**end repeat**/
+}
+/**end repeat**/
+
#define OBJECT_fastclip NULL
+/************************
+ * Fast putmask functions
+ *************************/
+
+/**begin repeat
+#name=BOOL,BYTE, UBYTE, SHORT, USHORT, INT, UINT, LONG, ULONG, LONGLONG, ULONGLONG, FLOAT, DOUBLE, LONGDOUBLE,CFLOAT, CDOUBLE, CLONGDOUBLE#
+#type= Bool, byte, ubyte, short, ushort, int, uint, long, ulong, longlong, ulonglong, float, double, longdouble,cfloat, cdouble, clongdouble#
+*/
+static void
[EMAIL PROTECTED]@_fastputmask(@type@ *in, Bool *mask, intp ni, @type@ *vals, intp nv)
+{
+ register npy_intp i;
+ @type@ s_val;
+
+ if (nv == 1) {
+ s_val = *vals;
+ for (i = 0; i < ni; i++) {
+ if (mask[i]) {
+ in[i] = s_val;
+ }
+ }
+ }
+ else {
+ for (i = 0; i < ni; i++) {
+ if (mask[i]) {
+ in[i] = vals[i%nv];
+ }
+ }
+ }
+ return;
+}
+/**end repeat**/
+
+#define OBJECT_fastputmask NULL
+
+
#define _ALIGN(type) offsetof(struct {char c; type v;},v)
/* Disable harmless compiler warning "4116: unnamed type definition in
@@ -2164,7 +2200,8 @@
(PyArray_ScalarKindFunc*)NULL,
NULL,
NULL,
- (PyArray_FastClipFunc *)NULL
+ (PyArray_FastClipFunc *)NULL,
+ (PyArray_FastPutmaskFunc *)NULL
};
static PyArray_Descr @[EMAIL PROTECTED] = {
@@ -2241,7 +2278,8 @@
(PyArray_ScalarKindFunc*)NULL,
NULL,
NULL,
- (PyArray_FastClipFunc*)@[EMAIL PROTECTED]
+ (PyArray_FastClipFunc*)@[EMAIL PROTECTED],
+ (PyArray_FastPutmaskFunc*)@[EMAIL PROTECTED]
};
static PyArray_Descr @[EMAIL PROTECTED] = {
_______________________________________________
Numpy-discussion mailing list
Numpy-discussion@scipy.org
http://projects.scipy.org/mailman/listinfo/numpy-discussion