Author: Maciej Fijalkowski <fij...@gmail.com> Branch: missing-ndarray-attributes Changeset: r57679:0017e12c21c0 Date: 2012-09-30 00:09 +0200 http://bitbucket.org/pypy/pypy/changeset/0017e12c21c0/
Log: Basic sorting - not RPython yet. argsort for now diff --git a/pypy/module/micronumpy/arrayimpl/concrete.py b/pypy/module/micronumpy/arrayimpl/concrete.py --- a/pypy/module/micronumpy/arrayimpl/concrete.py +++ b/pypy/module/micronumpy/arrayimpl/concrete.py @@ -10,6 +10,7 @@ from pypy.rpython.lltypesystem import rffi, lltype from pypy.rlib import jit from pypy.rlib.rawstorage import free_raw_storage +from pypy.module.micronumpy.arrayimpl.sort import sort_array class ConcreteArrayIterator(base.BaseArrayIterator): def __init__(self, array): @@ -372,6 +373,9 @@ def get_storage_as_int(self, space): return rffi.cast(lltype.Signed, self.storage) + def get_storage(self): + return self.storage + class ConcreteArray(BaseConcreteArray): def __init__(self, shape, dtype, order, strides, backstrides): self.shape = shape @@ -400,6 +404,9 @@ self.order) return SliceArray(0, strides, backstrides, new_shape, self) + def argsort(self, space): + return sort_array(self, space) + class SliceArray(BaseConcreteArray): def __init__(self, start, strides, backstrides, shape, parent, dtype=None): self.strides = strides diff --git a/pypy/module/micronumpy/arrayimpl/scalar.py b/pypy/module/micronumpy/arrayimpl/scalar.py --- a/pypy/module/micronumpy/arrayimpl/scalar.py +++ b/pypy/module/micronumpy/arrayimpl/scalar.py @@ -95,3 +95,6 @@ def get_storage_as_int(self, space): raise OperationError(space.w_ValueError, space.wrap("scalars have no address")) + + def argsort(self, space): + return space.wrap(0) diff --git a/pypy/module/micronumpy/arrayimpl/sort.py b/pypy/module/micronumpy/arrayimpl/sort.py new file mode 100644 --- /dev/null +++ b/pypy/module/micronumpy/arrayimpl/sort.py @@ -0,0 +1,60 @@ + +from pypy.rpython.lltypesystem import rffi +from pypy.rlib.listsort import make_timsort_class +from pypy.rlib.objectmodel import specialize +from pypy.rlib.rawstorage import raw_storage_getitem, raw_storage_setitem +from pypy.module.micronumpy.base import W_NDimArray +from pypy.module.micronumpy import interp_dtype + +@specialize.memo() +def make_sort_classes(space, TP): + class ArgArrayRepresentation(object): + def __init__(self, itemsize, size, values, indexes): + self.itemsize = itemsize + self.size = size + self.values = values + self.indexes = indexes + + def getitem(self, item): + idx = item * self.itemsize + return (raw_storage_getitem(TP, self.values, idx), + raw_storage_getitem(TP, self.indexes, idx)) + + def setitem(self, idx, item): + idx *= self.itemsize + raw_storage_setitem(self.values, idx, rffi.cast(TP, item[0])) + raw_storage_setitem(self.indexes, idx, item[1]) + + def arg_getitem(lst, item): + return lst.getitem(item) + + def arg_setitem(lst, item, value): + lst.setitem(item, value) + + def arg_length(lst): + return lst.size + + def arg_getitem_slice(lst, start, stop): + xxx + + def arg_lt(a, b): + return a[0] < b[0] + + ArgSort = make_timsort_class(arg_getitem, arg_setitem, arg_length, + arg_getitem_slice, arg_lt) + + return ArgArrayRepresentation, ArgSort + +def sort_array(arr, space): + itemsize = arr.dtype.itemtype.get_element_size() + # create array of indexes + dtype = interp_dtype.get_dtype_cache(space).w_longdtype + indexes = W_NDimArray.from_shape([arr.get_size()], dtype) + storage = indexes.implementation.get_storage() + for i in range(arr.get_size()): + raw_storage_setitem(storage, i * itemsize, i) + Repr, Sort = make_sort_classes(space, arr.dtype.itemtype.T) + r = Repr(itemsize, arr.get_size(), arr.get_storage(), + indexes.implementation.get_storage()) + Sort(r).sort() + return indexes diff --git a/pypy/module/micronumpy/interp_numarray.py b/pypy/module/micronumpy/interp_numarray.py --- a/pypy/module/micronumpy/interp_numarray.py +++ b/pypy/module/micronumpy/interp_numarray.py @@ -362,9 +362,11 @@ space.w_False])) return w_d - def descr_argsort(self, space, w_axis=-1, w_kind='quicksort', w_order=None): - raise OperationError(space.w_NotImplementedError, space.wrap( - "argsort not implemented yet")) + def descr_argsort(self, space, w_axis=None, w_kind=None, w_order=None): + # happily ignore the kind + # create a contiguous copy of the array + contig = self.descr_copy(space) + return contig.implementation.argsort(space) def descr_astype(self, space, w_type): raise OperationError(space.w_NotImplementedError, space.wrap( @@ -765,6 +767,8 @@ flat = GetSetProperty(W_NDimArray.descr_get_flatiter), item = interp2app(W_NDimArray.descr_item), + argsort = interp2app(W_NDimArray.descr_argsort), + __array_interface__ = GetSetProperty(W_NDimArray.descr_array_iface), ) diff --git a/pypy/module/micronumpy/test/test_numarray.py b/pypy/module/micronumpy/test/test_numarray.py --- a/pypy/module/micronumpy/test/test_numarray.py +++ b/pypy/module/micronumpy/test/test_numarray.py @@ -1549,6 +1549,16 @@ assert (b == [20, 1, 21, 3, 4]).all() raises(ValueError, "array([1, 2])[array([True, False, True])] = [1, 2, 3]") + def test_argsort(self): + from _numpypy import array, arange + a = array([6, 4, 1, 3, 8, 3]) + assert array(2.0).argsort() == 0 + res = a.argsort() + assert (res == [2, 3, 5, 1, 0, 4]).all() + assert (a == [6, 4, 1, 3, 8, 3]).all() # not modified + a = arange(100) + assert (a.argsort() == a).all() + class AppTestMultiDim(BaseNumpyAppTest): def test_init(self): import _numpypy diff --git a/pypy/rlib/listsort.py b/pypy/rlib/listsort.py --- a/pypy/rlib/listsort.py +++ b/pypy/rlib/listsort.py @@ -7,20 +7,28 @@ ## ------------------------------------------------------------------------ ## Adapted from CPython, original code and algorithms by Tim Peters -def list_getitem(list, item): - return list[item] +def make_timsort_class(getitem=None, setitem=None, length=None, + getitem_slice=None, lt=None): -def list_setitem(list, item, value): - list[item] = value + if getitem is None: + def getitem(list, item): + return list[item] -def list_length(list): - return len(list) + if setitem is None: + def setitem(list, item, value): + list[item] = value -def list_getitem_slice(list, start, stop): - return list[start:stop] + if length is None: + def length(list): + return len(list) -def make_timsort_class(getitem=list_getitem, setitem=list_setitem, - length=list_length, getitem_slice=list_getitem_slice): + if getitem_slice is None: + def getitem_slice(list, start, stop): + return list[start:stop] + + if lt is None: + def lt(a, b): + return a < b class TimSort(object): """TimSort(list).sort() @@ -38,7 +46,7 @@ setitem(self.list, item, val) def lt(self, a, b): - return a < b + return lt(a, b) def le(self, a, b): return not self.lt(b, a) # always use self.lt() as the primitive @@ -57,14 +65,14 @@ # set l to where list[start] belongs l = a.base r = start - pivot = a.list[r] + pivot = a.getitem(r) # Invariants: # pivot >= all in [base, l). # pivot < all in [r, start). # The second is vacuously true at the start. while l < r: p = l + ((r - l) >> 1) - if self.lt(pivot, a.list[p]): + if self.lt(pivot, a.getitem(p)): r = p else: l = p+1 @@ -75,8 +83,8 @@ # first slot after them -- that's why this sort is stable. # Slide over to make room. for p in xrange(start, l, -1): - a.list[p] = a.list[p-1] - a.list[l] = pivot + a.setitem(p, a.getitem(p-1)) + a.setitem(l, pivot) # Compute the length of the run in the slice "a". # "A run" is the longest ascending sequence, with @@ -100,17 +108,17 @@ descending = False else: n = 2 - if self.lt(a.list[a.base + 1], a.list[a.base]): + if self.lt(a.getitem(a.base + 1), a.getitem(a.base)): descending = True for p in xrange(a.base + 2, a.base + a.len): - if self.lt(a.list[p], a.list[p-1]): + if self.lt(a.getitem(p), a.getitem(p-1)): n += 1 else: break else: descending = False for p in xrange(a.base + 2, a.base + a.len): - if self.lt(a.list[p], a.list[p-1]): + if self.lt(a.getitem(p), a.getitem(p-1)): break else: n += 1 @@ -143,13 +151,13 @@ p = a.base + hint lastofs = 0 ofs = 1 - if lower(a.list[p], key): + if lower(a.getitem(p), key): # a[hint] < key -- gallop right, until # a[hint + lastofs] < key <= a[hint + ofs] maxofs = a.len - hint # a[a.len-1] is highest while ofs < maxofs: - if lower(a.list[p + ofs], key): + if lower(a.getitem(p + ofs), key): lastofs = ofs try: ofs = ovfcheck(ofs << 1) @@ -171,7 +179,7 @@ # a[hint - ofs] < key <= a[hint - lastofs] maxofs = hint + 1 # a[0] is lowest while ofs < maxofs: - if lower(a.list[p - ofs], key): + if lower(a.getitem(p - ofs), key): break else: # key <= a[hint - ofs] @@ -196,7 +204,7 @@ lastofs += 1 while lastofs < ofs: m = lastofs + ((ofs - lastofs) >> 1) - if lower(a.list[a.base + m], key): + if lower(a.getitem(a.base + m), key): lastofs = m+1 # a[m] < key else: ofs = m # key <= a[m] @@ -263,7 +271,7 @@ # Do the straightforward thing until (if ever) one run # appears to win consistently. while True: - if self.lt(b.list[b.base], a.list[a.base]): + if self.lt(b.getitem(b.base), a.getitem(a.base)): self.setitem(dest, b.popleft()) dest += 1 if b.len == 0: @@ -292,7 +300,7 @@ min_gallop -= min_gallop > 1 self.min_gallop = min_gallop - acount = self.gallop(b.list[b.base], a, hint=0, + acount = self.gallop(b.getitem(b.base), a, hint=0, rightmost=True) for p in xrange(a.base, a.base + acount): self.setitem(dest, a.getitem(p)) @@ -309,7 +317,7 @@ if b.len == 0: return - bcount = self.gallop(a.list[a.base], b, hint=0, + bcount = self.gallop(a.getitem(a.base), b, hint=0, rightmost=False) for p in xrange(b.base, b.base + bcount): self.setitem(dest, b.getitem(p)) @@ -366,8 +374,8 @@ # Do the straightforward thing until (if ever) one run # appears to win consistently. while True: - nexta = a.list[a.base + a.len - 1] - nextb = b.list[b.base + b.len - 1] + nexta = a.getitem(a.base + a.len - 1) + nextb = b.getitem(b.base + b.len - 1) if self.lt(nextb, nexta): dest -= 1 self.setitem(dest, nexta) @@ -399,7 +407,7 @@ min_gallop -= min_gallop > 1 self.min_gallop = min_gallop - nextb = b.list[b.base + b.len - 1] + nextb = b.getitem(b.base + b.len - 1) k = self.gallop(nextb, a, hint=a.len-1, rightmost=True) acount = a.len - k for p in xrange(a.base + a.len - 1, a.base + k - 1, -1): @@ -414,7 +422,7 @@ if b.len == 1: return - nexta = a.list[a.base + a.len - 1] + nexta = a.getitem(a.base + a.len - 1) k = self.gallop(nexta, b, hint=b.len-1, rightmost=False) bcount = b.len - k for p in xrange(b.base + b.len - 1, b.base + k - 1, -1): @@ -463,14 +471,14 @@ # Where does b start in a? Elements in a before that can be # ignored (already in place). - k = self.gallop(b.list[b.base], a, hint=0, rightmost=True) + k = self.gallop(b.getitem(b.base), a, hint=0, rightmost=True) a.advance(k) if a.len == 0: return # Where does a end in b? Elements in b after that can be # ignored (already in place). - b.len = self.gallop(a.list[a.base+a.len-1], b, hint=b.len-1, + b.len = self.gallop(a.getitem(a.base+a.len-1), b, hint=b.len-1, rightmost=False) if b.len == 0: return @@ -589,6 +597,9 @@ def getitem(self, item): return getitem(self.list, item) + def setitem(self, item, value): + setitem(self.list, item, value) + def popleft(self): result = getitem(self.list, self.base) self.base += 1 _______________________________________________ pypy-commit mailing list pypy-commit@python.org http://mail.python.org/mailman/listinfo/pypy-commit