Author: Maciej Fijalkowski <fij...@gmail.com>
Branch: numpy-multidim-shards
Changeset: r49297:083d184dc8ab
Date: 2011-11-11 12:10 +0100
http://bitbucket.org/pypy/pypy/changeset/083d184dc8ab/

Log:    A branch to use shards instead of just raw indexes that have to be
        computed using expensive division

diff --git a/pypy/module/micronumpy/interp_numarray.py 
b/pypy/module/micronumpy/interp_numarray.py
--- a/pypy/module/micronumpy/interp_numarray.py
+++ b/pypy/module/micronumpy/interp_numarray.py
@@ -68,22 +68,73 @@
         dtype.setitem_w(space, arr.storage, i, w_elem)
     return arr
 
-class ArrayIndex(object):
-    """ An index into an array or view. Offset is a data offset, indexes
-    are respective indexes in dimensions
-    """
-    def __init__(self, indexes, offset):
-        self.indexes = indexes
-        self.offset = offset
+class ArrayIterator(object):
+    def __init__(self, size):
+        self.offset = 0
+        self.size   = size
+
+    def next(self):
+        self.offset += 1
+
+    def done(self):
+        return self.offset >= self.size
+
+class ViewIterator(object):
+    def __init__(self, arr):
+        self.indices = [0] * len(arr.shape)
+        self.offset  = arr.start
+        self.arr     = arr
+        self.done    = False
+
+    @jit.unroll_safe
+    def next(self):
+        for i in range(len(self.indices)):
+            if self.indices[i] < self.arr.shape[i]:
+                self.indices[i] += 1
+                self.offset += self.arr.shards[i]
+                break
+            else:
+                self.indices[i] = 0
+                self.offset -= self.arr.backshards[i]
+        else:
+            self.done = True
+
+    def done(self):
+        return self.done
+
+class Call2Iterator(object):
+    def __init__(self, left, right):
+        self.left = left
+        self.right = right
+
+    def next(self):
+        self.left.next()
+        self.right.next()
+
+    def done(self):
+        return self.left.done()
+
+class Call1Iterator(object):
+    def __init__(self, child):
+        self.child = child
+
+    def next(self):
+        self.child.next()
+
+    def done(self):
+        return self.child.done()
 
 class BaseArray(Wrappable):
-    _attrs_ = ["invalidates", "signature", "shape"]
+    _attrs_ = ["invalidates", "signature", "shape", "shards", "backshards",
+               "start"]
 
-    _immutable_fields_ = ['shape[*]']
+    _immutable_fields_ = ['shape[*]', "shards[*]", "backshards[*]"]
 
-    def __init__(self, shape):
+    def __init__(self, shards, backshards, shape):
         self.invalidates = []
         self.shape = shape
+        self.shards = shards
+        self.backshards = backshards
 
     def invalidated(self):
         if self.invalidates:
@@ -155,8 +206,9 @@
         reduce_driver = jit.JitDriver(greens=['signature'],
                          reds = ['i', 'size', 'result', 'self', 'cur_best', 
'dtype'])
         def loop(self, size):
+            xxx
             result = 0
-            cur_best = self.eval(0)
+            cur_best = self.eval(self.start)
             i = 1
             dtype = self.find_dtype()
             while i < size:
@@ -164,6 +216,7 @@
                                               self=self, dtype=dtype,
                                               size=size, i=i, result=result,
                                               cur_best=cur_best)
+                xxx
                 new_best = getattr(dtype, op_name)(cur_best, self.eval(i))
                 if dtype.ne(new_best, cur_best):
                     result = i
@@ -180,6 +233,7 @@
         return func_with_new_name(impl, "reduce_arg%s_impl" % op_name)
 
     def _all(self):
+        xxx
         size = self.find_size()
         dtype = self.find_dtype()
         i = 0
@@ -193,6 +247,7 @@
         return space.wrap(self._all())
 
     def _any(self):
+        xxx
         size = self.find_size()
         dtype = self.find_dtype()
         i = 0
@@ -233,6 +288,7 @@
         return self.get_concrete().descr_len(space)
 
     def descr_repr(self, space):
+        xxx
         # Simple implementation so that we can see the array.
         # Since what we want is to print a plethora of 2d views, 
         # use recursive calls to  to_str() to do the work.
@@ -279,10 +335,11 @@
             if idx < 0 or idx >= self.shape[0]:
                 raise OperationError(space.w_IndexError,
                                      space.wrap("index out of range"))
-            return idx
+            return self.start + idx * self.shards[0]
         index = [space.int_w(w_item)
                  for w_item in space.fixedview(w_idx)]
         item = 0
+        xxx
         for i in range(len(index)):
             v = index[i]
             if v < 0:
@@ -327,45 +384,20 @@
                 return False
         return True
 
-    def _create_slice(self, space, w_idx):
-        new_sig = signature.Signature.find_sig([
-            NDimSlice.signature, self.signature
-        ])
-        if (space.isinstance_w(w_idx, space.w_int) or
-            space.isinstance_w(w_idx, space.w_slice)):
-            start, stop, step, lgt = space.decode_index4(w_idx, self.shape[0])
-            if step == 0:
-                shape = self.shape[1:]
-            else:
-                shape = [lgt] + self.shape[1:]
-            chunks = [(start, stop, step, lgt)]
-        else:
-            chunks = []
-            shape = self.shape[:]
-            for i, w_item in enumerate(space.fixedview(w_idx)):
-                start, stop, step, lgt = space.decode_index4(w_item,
-                                                             self.shape[i])
-                chunks.append((start, stop, step, lgt))
-                if step == 0:
-                    shape[i] = -1
-                else:
-                    shape[i] = lgt
-            shape = [i for i in shape if i != -1][:]
-        return NDimSlice(self, new_sig, chunks[:], shape)
-
     def descr_getitem(self, space, w_idx):
         if self._single_item_result(space, w_idx):
-            item = self._index_of_single_item(space, w_idx)
-            return self.get_concrete().eval(item).wrap(space)
+            concrete = self.get_concrete()
+            item = concrete._index_of_single_item(space, w_idx)
+            return concrete.eval(item).wrap(space)
         return space.wrap(self._create_slice(space, w_idx))
 
     def descr_setitem(self, space, w_idx, w_value):
         self.invalidated()
+        concrete = self.get_concrete()
         if self._single_item_result(space, w_idx):
-            item = self._index_of_single_item(space, w_idx)
-            self.get_concrete().setitem_w(space, item, w_value)
+            item = concrete._index_of_single_item(space, w_idx)
+            concrete.setitem_w(space, item, w_value)
             return
-        concrete = self.get_concrete()
         if isinstance(w_value, BaseArray):
             # for now we just copy if setting part of an array from
             # part of itself. can be improved.
@@ -378,6 +410,42 @@
         view = self._create_slice(space, w_idx)
         view.setslice(space, w_value)
 
+    def _create_slice(self, space, w_idx):
+        new_sig = signature.Signature.find_sig([
+            NDimSlice.signature, self.signature
+        ])
+        if (space.isinstance_w(w_idx, space.w_int) or
+            space.isinstance_w(w_idx, space.w_slice)):
+            start, stop, step, lgt = space.decode_index4(w_idx, self.shape[0])
+            if step == 0:
+                shape = self.shape[1:]
+                shards = self.shards[1:]
+                backshards = self.backshards[1:]
+            else:
+                shape = [lgt] + self.shape[1:]
+                shards = [self.shards[0] * step] + self.shards[1:]
+                backshards = [lgt * self.shards[0] * step] + 
self.backshards[1:]
+        else:
+            shape = []
+            shards = []
+            backshards = []
+            start = -1
+            i = 0
+            for i, w_item in enumerate(space.fixedview(w_idx)):
+                start_, stop, step, lgt = space.decode_index4(w_item,
+                                                             self.shape[i])
+                if step != 0:
+                    if start == -1:
+                        start = start_ * self.shards[i] + self.start
+                    shape.append(lgt)
+                    shards.append(self.shards[i] * step)
+                    backshards.append(self.shards[i] * lgt * step)
+            # add a reminder
+            shape += self.shape[i + 1:]
+            shards += self.shards[i + 1:]
+            backshards += self.backshards[i + 1:]
+        return NDimSlice(self, new_sig, start, end, shards, backshards, shape)
+
     def descr_mean(self, space):
         return 
space.wrap(space.float_w(self.descr_sum(space))/self.find_size())
 
@@ -388,7 +456,7 @@
                     "The truth value of an array with more than one element is 
ambiguous. Use a.any() or a.all()"))
         except ValueError:
             pass
-        return 
space.wrap(space.is_true(self.get_concrete().eval(0).wrap(space)))
+        return 
space.wrap(space.is_true(self.get_concrete().eval(self.start).wrap(space)))
 
 def convert_to_array(space, w_obj):
     if isinstance(w_obj, BaseArray):
@@ -416,7 +484,7 @@
     _attrs_ = ["dtype", "value", "shape"]
 
     def __init__(self, dtype, value):
-        BaseArray.__init__(self, [])
+        BaseArray.__init__(self, None, None, [])
         self.dtype = dtype
         self.value = value
 
@@ -429,7 +497,7 @@
     def find_dtype(self):
         return self.dtype
 
-    def eval(self, i):
+    def eval(self, offset):
         return self.value
 
 class VirtualArray(BaseArray):
@@ -437,7 +505,7 @@
     Class for representing virtual arrays, such as binary ops or ufuncs
     """
     def __init__(self, signature, shape, res_dtype):
-        BaseArray.__init__(self, shape)
+        BaseArray.__init__(self, None, None, shape)
         self.forced_result = None
         self.signature = signature
         self.res_dtype = res_dtype
@@ -451,12 +519,13 @@
         signature = self.signature
         result_size = self.find_size()
         result = NDimArray(result_size, self.shape, self.find_dtype())
-        while i < result_size:
+        i = self.start_iter()
+        while not i.done():
             numpy_driver.jit_merge_point(signature=signature,
                                          result_size=result_size, i=i,
                                          self=self, result=result)
-            result.dtype.setitem(result.storage, i, self.eval(i))
-            i += 1
+            result.dtype.setitem(result.storage, i.offset, self.eval(i.offset))
+            i = self.next_index(i)
         return result
 
     def force_if_needed(self):
@@ -468,10 +537,10 @@
         self.force_if_needed()
         return self.forced_result
 
-    def eval(self, i):
+    def eval(self, offset):
         if self.forced_result is not None:
-            return self.forced_result.eval(i)
-        return self._eval(i)
+            return self.forced_result.eval(offset)
+        return self._eval(offset)
 
     def setitem(self, item, value):
         return self.get_concrete().setitem(item, value)
@@ -545,13 +614,10 @@
     Class for representing views of arrays, they will reflect changes of parent
     arrays. Example: slices
     """
-    def __init__(self, parent, signature, shape):
-        BaseArray.__init__(self, shape)
+    def __init__(self, parent, signature, shards, backshards, shape):
+        BaseArray.__init__(self, shards, backshards, shape)
         self.signature = signature
         self.parent = parent
-        self.size = 1
-        for elem in shape:
-            self.size *= elem
         self.invalidates = parent.invalidates
 
     def get_concrete(self):
@@ -561,12 +627,12 @@
         self.parent.get_concrete()
         return self
 
-    def eval(self, i):
-        return self.parent.eval(self.calc_index(i))
+    def eval(self, offset):
+        return self.parent.eval(offset)
 
     @unwrap_spec(item=int)
     def setitem_w(self, space, item, w_value):
-        return self.parent.setitem_w(space, self.calc_index(item), w_value)
+        return self.parent.setitem_w(space, item, w_value)
 
     def setitem(self, item, value):
         # This is currently not possible to be called from anywhere.
@@ -577,17 +643,18 @@
             return space.wrap(self.shape[0])
         return space.wrap(1)
 
-    def calc_index(self, item):
-        raise NotImplementedError
-
 class NDimSlice(ViewArray):
     signature = signature.BaseSignature()
 
-    _immutable_fields_ = ['shape[*]', 'chunks[*]']
+    _immutable_fields_ = ['shape[*]', 'shards[*]', 'backshards[*]', 'start']
 
-    def __init__(self, parent, signature, chunks, shape):
-        ViewArray.__init__(self, parent, signature, shape)
-        self.chunks = chunks
+    def __init__(self, parent, signature, start, end, shards, backshards,
+                 shape):
+        if isinstance(parent, NDimSlice):
+            parent = parent.parent
+        ViewArray.__init__(self, parent, signature, shards, backshards, shape)
+        self.start = start
+        self.end   = end
 
     def get_root_storage(self):
         return self.parent.get_concrete().get_root_storage()
@@ -606,6 +673,7 @@
         self._sliceloop(w_value)
 
     def _sliceloop(self, source):
+        xxx
         i = 0
         while i < self.size:
             slice_driver.jit_merge_point(signature=source.signature, i=i,
@@ -614,14 +682,12 @@
             i += 1
 
     def setitem(self, item, value):
+        xxx
         self.parent.setitem(self.calc_index(item), value)
 
     def get_root_shape(self):
         return self.parent.get_root_shape()
 
-    # XXX we might want to provide a custom finder of where we look for
-    #     a particular item, right now we'll do the calculations again
-
     @jit.unroll_safe
     def calc_index(self, item):
         index = []
@@ -656,6 +722,7 @@
         return item
 
     def to_str(self, comma, indent=' '):
+        xxx
         ret = StringBuilder()
         dtype = self.find_dtype()
         ndims = len(self.shape)
@@ -698,15 +765,24 @@
                            for j in range(self.shape[0])]))
             ret.append(']')
         else:
-            ret.append(dtype.str_format(self.eval(0)))
+            ret.append(dtype.str_format(self.eval(self.start)))
         return ret.build()
 
 class NDimArray(BaseArray):
     """ A class representing contiguous array. We know that each iteration
     by say ufunc will increase the data index by one
     """
+    start = 0
+    
     def __init__(self, size, shape, dtype):
-        BaseArray.__init__(self, shape)
+        shards = []
+        backshards = []
+        s = 1
+        for sh in shape:
+            shards.append(s)
+            backshards.append(s * (sh - 1))
+            s *= sh
+        BaseArray.__init__(self, shards, backshards, shape)
         self.size = size
         self.dtype = dtype
         self.storage = dtype.malloc(size)
@@ -724,8 +800,8 @@
     def find_dtype(self):
         return self.dtype
 
-    def eval(self, i):
-        return self.dtype.getitem(self.storage, i)
+    def eval(self, offset):
+        return self.dtype.getitem(self.storage, offset)
 
     def descr_len(self, space):
         if len(self.shape):
diff --git a/pypy/module/micronumpy/test/test_numarray.py 
b/pypy/module/micronumpy/test/test_numarray.py
--- a/pypy/module/micronumpy/test/test_numarray.py
+++ b/pypy/module/micronumpy/test/test_numarray.py
@@ -1,6 +1,55 @@
 from pypy.module.micronumpy.test.test_base import BaseNumpyAppTest
+from pypy.module.micronumpy.interp_numarray import NDimArray
+from pypy.module.micronumpy import signature
 from pypy.conftest import gettestobjspace
 
+class MockDtype(object):
+    signature = signature.BaseSignature()
+    def malloc(self, size):
+        return None
+
+class TestNumArrayDirect(object):
+    def newslice(self, *args):
+        return self.space.newslice(*[self.space.wrap(arg) for arg in args])
+    
+    def test_shards(self):
+        a = NDimArray(100, [10, 5, 3], MockDtype())
+        assert a.shards == [1, 10, 50]
+        assert a.backshards == [9, 40, 100]
+
+    def test_create_slice(self):
+        space = self.space
+        a = NDimArray(10*5*3, [10, 5, 3], MockDtype())
+        s = a._create_slice(space, space.wrap(3))
+        assert s.start == 3
+        assert s.shards == [10, 50]
+        assert s.backshards == [40, 100]
+        s = a._create_slice(space, self.newslice(1, 9, 2))
+        assert s.start == 1
+        assert s.shards == [2, 10, 50]
+        assert s.backshards == [8, 40, 100]
+        s = a._create_slice(space, space.newtuple([
+            self.newslice(1, 5, 3), self.newslice(1, 2, 1), space.wrap(1)]))
+        assert s.start == 1
+        assert s.shape == [2, 1]
+        assert s.shards == [3, 10]
+        assert s.backshards == [6, 10]
+
+    def test_slice_of_slice(self):
+        space = self.space
+        a = NDimArray(10*5*3, [10, 5, 3], MockDtype())
+        s = a._create_slice(space, space.wrap(5))
+        s2 = s._create_slice(space, space.wrap(3))
+        assert s2.shape == [3]
+        assert s2.shards == [50]
+        assert s2.parent is a
+        assert s2.backshards == [100]
+        s = a._create_slice(space, self.newslice(1, 5, 3))
+        s2 = s._create_slice(space, space.newtuple([
+            self.newslice(None, None, None), space.wrap(2)]))
+        assert s2.shape == [2, 3]
+        assert s2.shards == [3, 50]
+        assert s2.backshards == [6, 100]
 
 class AppTestNumArray(BaseNumpyAppTest):
     def test_type(self):
@@ -53,87 +102,6 @@
         assert a[0] == 1
         assert a.shape == ()
 
-    def test_repr(self):
-        from numpy import array, zeros
-        a = array(range(5), float)
-        assert repr(a) == "array([0.0, 1.0, 2.0, 3.0, 4.0])"
-        a = array([], float)
-        assert repr(a) == "array([], dtype=float64)"
-        a = zeros(1001)
-        assert repr(a) == "array([0.0, 0.0, 0.0, ..., 0.0, 0.0, 0.0])"
-        a = array(range(5), long)
-        assert repr(a) == "array([0, 1, 2, 3, 4])"
-        a = array([], long)
-        assert repr(a) == "array([], dtype=int64)"
-        a = array([True, False, True, False], "?")
-        assert repr(a) == "array([True, False, True, False], dtype=bool)"
-        a = zeros((3,4))
-        assert repr(a) == '''array([[0.0, 0.0, 0.0, 0.0],
-       [0.0, 0.0, 0.0, 0.0],
-       [0.0, 0.0, 0.0, 0.0]])'''
-        a = zeros((2,3,4))
-        assert repr(a) == '''array([[[0.0, 0.0, 0.0, 0.0],
-        [0.0, 0.0, 0.0, 0.0],
-        [0.0, 0.0, 0.0, 0.0]],
-
-       [[0.0, 0.0, 0.0, 0.0],
-        [0.0, 0.0, 0.0, 0.0],
-        [0.0, 0.0, 0.0, 0.0]]])'''
-
-    def test_repr_slice(self):
-        from numpy import array, zeros
-        a = array(range(5), float)
-        b = a[1::2]
-        assert repr(b) == "array([1.0, 3.0])"
-        a = zeros(2002)
-        b = a[::2]
-        assert repr(b) == "array([0.0, 0.0, 0.0, ..., 0.0, 0.0, 0.0])"
-        a = array((range(5),range(5,10)), dtype="int16")
-        b=a[1,2:]
-        assert repr(b) == "array([7, 8, 9], dtype=int16)"
-        #This is the way cpython numpy does it - an empty slice prints its 
shape
-        b=a[2:1,]
-        assert repr(b) == "array([], shape=(0, 5), dtype=int16)"
-
-    def test_str(self):
-        from numpy import array, zeros
-        a = array(range(5), float)
-        assert str(a) == "[0.0 1.0 2.0 3.0 4.0]"
-        assert str((2*a)[:]) == "[0.0 2.0 4.0 6.0 8.0]"
-        a = zeros(1001)
-        assert str(a) == "[0.0 0.0 0.0 ..., 0.0 0.0 0.0]"
-
-        a = array(range(5), dtype=long)
-        assert str(a) == "[0 1 2 3 4]"
-        a = array([True, False, True, False], dtype="?")
-        assert str(a) == "[True False True False]"
-
-        a = array(range(5), dtype="int8")
-        assert str(a) == "[0 1 2 3 4]"
-
-        a = array(range(5), dtype="int16")
-        assert str(a) == "[0 1 2 3 4]"
-
-        a = array((range(5),range(5,10)), dtype="int16")
-        assert str(a) == "[[0 1 2 3 4],\n [5 6 7 8 9]]"
-
-        a = array(3,dtype=int)
-        assert str(a) == "3"
-
-    def test_str_slice(self):
-        from numpy import array, zeros
-        a = array(range(5), float)
-        b = a[1::2]
-        assert str(b) == "[1.0 3.0]"
-        a = zeros(2002)
-        b = a[::2]
-        assert str(b) == "[0.0 0.0 0.0 ..., 0.0 0.0 0.0]"
-        a = array((range(5),range(5,10)), dtype="int16")
-        b=a[1,2:]
-        assert str(b) == "[7 8 9]"
-        b=a[2:1,]
-        assert str(b) == "[]"
-
     def test_getitem(self):
         from numpy import array
         a = array(range(5))
@@ -762,3 +730,85 @@
         for i in range(4):
             assert a[i] == i + 1
         raises(ValueError, fromstring, "abc")
+
+class AppTestRepr(BaseNumpyAppTest):
+    def test_repr(self):
+        from numpy import array, zeros
+        a = array(range(5), float)
+        assert repr(a) == "array([0.0, 1.0, 2.0, 3.0, 4.0])"
+        a = array([], float)
+        assert repr(a) == "array([], dtype=float64)"
+        a = zeros(1001)
+        assert repr(a) == "array([0.0, 0.0, 0.0, ..., 0.0, 0.0, 0.0])"
+        a = array(range(5), long)
+        assert repr(a) == "array([0, 1, 2, 3, 4])"
+        a = array([], long)
+        assert repr(a) == "array([], dtype=int64)"
+        a = array([True, False, True, False], "?")
+        assert repr(a) == "array([True, False, True, False], dtype=bool)"
+        a = zeros((3,4))
+        assert repr(a) == '''array([[0.0, 0.0, 0.0, 0.0],
+       [0.0, 0.0, 0.0, 0.0],
+       [0.0, 0.0, 0.0, 0.0]])'''
+        a = zeros((2,3,4))
+        assert repr(a) == '''array([[[0.0, 0.0, 0.0, 0.0],
+        [0.0, 0.0, 0.0, 0.0],
+        [0.0, 0.0, 0.0, 0.0]],
+
+       [[0.0, 0.0, 0.0, 0.0],
+        [0.0, 0.0, 0.0, 0.0],
+        [0.0, 0.0, 0.0, 0.0]]])'''
+
+    def test_repr_slice(self):
+        from numpy import array, zeros
+        a = array(range(5), float)
+        b = a[1::2]
+        assert repr(b) == "array([1.0, 3.0])"
+        a = zeros(2002)
+        b = a[::2]
+        assert repr(b) == "array([0.0, 0.0, 0.0, ..., 0.0, 0.0, 0.0])"
+        a = array((range(5),range(5,10)), dtype="int16")
+        b=a[1,2:]
+        assert repr(b) == "array([7, 8, 9], dtype=int16)"
+        #This is the way cpython numpy does it - an empty slice prints its 
shape
+        b=a[2:1,]
+        assert repr(b) == "array([], shape=(0, 5), dtype=int16)"
+
+    def test_str(self):
+        from numpy import array, zeros
+        a = array(range(5), float)
+        assert str(a) == "[0.0 1.0 2.0 3.0 4.0]"
+        assert str((2*a)[:]) == "[0.0 2.0 4.0 6.0 8.0]"
+        a = zeros(1001)
+        assert str(a) == "[0.0 0.0 0.0 ..., 0.0 0.0 0.0]"
+
+        a = array(range(5), dtype=long)
+        assert str(a) == "[0 1 2 3 4]"
+        a = array([True, False, True, False], dtype="?")
+        assert str(a) == "[True False True False]"
+
+        a = array(range(5), dtype="int8")
+        assert str(a) == "[0 1 2 3 4]"
+
+        a = array(range(5), dtype="int16")
+        assert str(a) == "[0 1 2 3 4]"
+
+        a = array((range(5),range(5,10)), dtype="int16")
+        assert str(a) == "[[0 1 2 3 4],\n [5 6 7 8 9]]"
+
+        a = array(3,dtype=int)
+        assert str(a) == "3"
+
+    def test_str_slice(self):
+        from numpy import array, zeros
+        a = array(range(5), float)
+        b = a[1::2]
+        assert str(b) == "[1.0 3.0]"
+        a = zeros(2002)
+        b = a[::2]
+        assert str(b) == "[0.0 0.0 0.0 ..., 0.0 0.0 0.0]"
+        a = array((range(5),range(5,10)), dtype="int16")
+        b=a[1,2:]
+        assert str(b) == "[7 8 9]"
+        b=a[2:1,]
+        assert str(b) == "[]"
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
http://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to