Author: Maciej Fijalkowski <[email protected]>
Branch: numpy-multidim-shards
Changeset: r49297:083d184dc8ab
Date: 2011-11-11 12:10 +0100
http://bitbucket.org/pypy/pypy/changeset/083d184dc8ab/
Log: A branch to use shards instead of just raw indexes that have to be
computed using expensive division
diff --git a/pypy/module/micronumpy/interp_numarray.py
b/pypy/module/micronumpy/interp_numarray.py
--- a/pypy/module/micronumpy/interp_numarray.py
+++ b/pypy/module/micronumpy/interp_numarray.py
@@ -68,22 +68,73 @@
dtype.setitem_w(space, arr.storage, i, w_elem)
return arr
-class ArrayIndex(object):
- """ An index into an array or view. Offset is a data offset, indexes
- are respective indexes in dimensions
- """
- def __init__(self, indexes, offset):
- self.indexes = indexes
- self.offset = offset
+class ArrayIterator(object):
+ def __init__(self, size):
+ self.offset = 0
+ self.size = size
+
+ def next(self):
+ self.offset += 1
+
+ def done(self):
+ return self.offset >= self.size
+
+class ViewIterator(object):
+ def __init__(self, arr):
+ self.indices = [0] * len(arr.shape)
+ self.offset = arr.start
+ self.arr = arr
+ self.done = False
+
+ @jit.unroll_safe
+ def next(self):
+ for i in range(len(self.indices)):
+ if self.indices[i] < self.arr.shape[i]:
+ self.indices[i] += 1
+ self.offset += self.arr.shards[i]
+ break
+ else:
+ self.indices[i] = 0
+ self.offset -= self.arr.backshards[i]
+ else:
+ self.done = True
+
+ def done(self):
+ return self.done
+
+class Call2Iterator(object):
+ def __init__(self, left, right):
+ self.left = left
+ self.right = right
+
+ def next(self):
+ self.left.next()
+ self.right.next()
+
+ def done(self):
+ return self.left.done()
+
+class Call1Iterator(object):
+ def __init__(self, child):
+ self.child = child
+
+ def next(self):
+ self.child.next()
+
+ def done(self):
+ return self.child.done()
class BaseArray(Wrappable):
- _attrs_ = ["invalidates", "signature", "shape"]
+ _attrs_ = ["invalidates", "signature", "shape", "shards", "backshards",
+ "start"]
- _immutable_fields_ = ['shape[*]']
+ _immutable_fields_ = ['shape[*]', "shards[*]", "backshards[*]"]
- def __init__(self, shape):
+ def __init__(self, shards, backshards, shape):
self.invalidates = []
self.shape = shape
+ self.shards = shards
+ self.backshards = backshards
def invalidated(self):
if self.invalidates:
@@ -155,8 +206,9 @@
reduce_driver = jit.JitDriver(greens=['signature'],
reds = ['i', 'size', 'result', 'self', 'cur_best',
'dtype'])
def loop(self, size):
+ xxx
result = 0
- cur_best = self.eval(0)
+ cur_best = self.eval(self.start)
i = 1
dtype = self.find_dtype()
while i < size:
@@ -164,6 +216,7 @@
self=self, dtype=dtype,
size=size, i=i, result=result,
cur_best=cur_best)
+ xxx
new_best = getattr(dtype, op_name)(cur_best, self.eval(i))
if dtype.ne(new_best, cur_best):
result = i
@@ -180,6 +233,7 @@
return func_with_new_name(impl, "reduce_arg%s_impl" % op_name)
def _all(self):
+ xxx
size = self.find_size()
dtype = self.find_dtype()
i = 0
@@ -193,6 +247,7 @@
return space.wrap(self._all())
def _any(self):
+ xxx
size = self.find_size()
dtype = self.find_dtype()
i = 0
@@ -233,6 +288,7 @@
return self.get_concrete().descr_len(space)
def descr_repr(self, space):
+ xxx
# Simple implementation so that we can see the array.
# Since what we want is to print a plethora of 2d views,
# use recursive calls to to_str() to do the work.
@@ -279,10 +335,11 @@
if idx < 0 or idx >= self.shape[0]:
raise OperationError(space.w_IndexError,
space.wrap("index out of range"))
- return idx
+ return self.start + idx * self.shards[0]
index = [space.int_w(w_item)
for w_item in space.fixedview(w_idx)]
item = 0
+ xxx
for i in range(len(index)):
v = index[i]
if v < 0:
@@ -327,45 +384,20 @@
return False
return True
- def _create_slice(self, space, w_idx):
- new_sig = signature.Signature.find_sig([
- NDimSlice.signature, self.signature
- ])
- if (space.isinstance_w(w_idx, space.w_int) or
- space.isinstance_w(w_idx, space.w_slice)):
- start, stop, step, lgt = space.decode_index4(w_idx, self.shape[0])
- if step == 0:
- shape = self.shape[1:]
- else:
- shape = [lgt] + self.shape[1:]
- chunks = [(start, stop, step, lgt)]
- else:
- chunks = []
- shape = self.shape[:]
- for i, w_item in enumerate(space.fixedview(w_idx)):
- start, stop, step, lgt = space.decode_index4(w_item,
- self.shape[i])
- chunks.append((start, stop, step, lgt))
- if step == 0:
- shape[i] = -1
- else:
- shape[i] = lgt
- shape = [i for i in shape if i != -1][:]
- return NDimSlice(self, new_sig, chunks[:], shape)
-
def descr_getitem(self, space, w_idx):
if self._single_item_result(space, w_idx):
- item = self._index_of_single_item(space, w_idx)
- return self.get_concrete().eval(item).wrap(space)
+ concrete = self.get_concrete()
+ item = concrete._index_of_single_item(space, w_idx)
+ return concrete.eval(item).wrap(space)
return space.wrap(self._create_slice(space, w_idx))
def descr_setitem(self, space, w_idx, w_value):
self.invalidated()
+ concrete = self.get_concrete()
if self._single_item_result(space, w_idx):
- item = self._index_of_single_item(space, w_idx)
- self.get_concrete().setitem_w(space, item, w_value)
+ item = concrete._index_of_single_item(space, w_idx)
+ concrete.setitem_w(space, item, w_value)
return
- concrete = self.get_concrete()
if isinstance(w_value, BaseArray):
# for now we just copy if setting part of an array from
# part of itself. can be improved.
@@ -378,6 +410,42 @@
view = self._create_slice(space, w_idx)
view.setslice(space, w_value)
+ def _create_slice(self, space, w_idx):
+ new_sig = signature.Signature.find_sig([
+ NDimSlice.signature, self.signature
+ ])
+ if (space.isinstance_w(w_idx, space.w_int) or
+ space.isinstance_w(w_idx, space.w_slice)):
+ start, stop, step, lgt = space.decode_index4(w_idx, self.shape[0])
+ if step == 0:
+ shape = self.shape[1:]
+ shards = self.shards[1:]
+ backshards = self.backshards[1:]
+ else:
+ shape = [lgt] + self.shape[1:]
+ shards = [self.shards[0] * step] + self.shards[1:]
+ backshards = [lgt * self.shards[0] * step] +
self.backshards[1:]
+ else:
+ shape = []
+ shards = []
+ backshards = []
+ start = -1
+ i = 0
+ for i, w_item in enumerate(space.fixedview(w_idx)):
+ start_, stop, step, lgt = space.decode_index4(w_item,
+ self.shape[i])
+ if step != 0:
+ if start == -1:
+ start = start_ * self.shards[i] + self.start
+ shape.append(lgt)
+ shards.append(self.shards[i] * step)
+ backshards.append(self.shards[i] * lgt * step)
+ # add a reminder
+ shape += self.shape[i + 1:]
+ shards += self.shards[i + 1:]
+ backshards += self.backshards[i + 1:]
+ return NDimSlice(self, new_sig, start, end, shards, backshards, shape)
+
def descr_mean(self, space):
return
space.wrap(space.float_w(self.descr_sum(space))/self.find_size())
@@ -388,7 +456,7 @@
"The truth value of an array with more than one element is
ambiguous. Use a.any() or a.all()"))
except ValueError:
pass
- return
space.wrap(space.is_true(self.get_concrete().eval(0).wrap(space)))
+ return
space.wrap(space.is_true(self.get_concrete().eval(self.start).wrap(space)))
def convert_to_array(space, w_obj):
if isinstance(w_obj, BaseArray):
@@ -416,7 +484,7 @@
_attrs_ = ["dtype", "value", "shape"]
def __init__(self, dtype, value):
- BaseArray.__init__(self, [])
+ BaseArray.__init__(self, None, None, [])
self.dtype = dtype
self.value = value
@@ -429,7 +497,7 @@
def find_dtype(self):
return self.dtype
- def eval(self, i):
+ def eval(self, offset):
return self.value
class VirtualArray(BaseArray):
@@ -437,7 +505,7 @@
Class for representing virtual arrays, such as binary ops or ufuncs
"""
def __init__(self, signature, shape, res_dtype):
- BaseArray.__init__(self, shape)
+ BaseArray.__init__(self, None, None, shape)
self.forced_result = None
self.signature = signature
self.res_dtype = res_dtype
@@ -451,12 +519,13 @@
signature = self.signature
result_size = self.find_size()
result = NDimArray(result_size, self.shape, self.find_dtype())
- while i < result_size:
+ i = self.start_iter()
+ while not i.done():
numpy_driver.jit_merge_point(signature=signature,
result_size=result_size, i=i,
self=self, result=result)
- result.dtype.setitem(result.storage, i, self.eval(i))
- i += 1
+ result.dtype.setitem(result.storage, i.offset, self.eval(i.offset))
+ i = self.next_index(i)
return result
def force_if_needed(self):
@@ -468,10 +537,10 @@
self.force_if_needed()
return self.forced_result
- def eval(self, i):
+ def eval(self, offset):
if self.forced_result is not None:
- return self.forced_result.eval(i)
- return self._eval(i)
+ return self.forced_result.eval(offset)
+ return self._eval(offset)
def setitem(self, item, value):
return self.get_concrete().setitem(item, value)
@@ -545,13 +614,10 @@
Class for representing views of arrays, they will reflect changes of parent
arrays. Example: slices
"""
- def __init__(self, parent, signature, shape):
- BaseArray.__init__(self, shape)
+ def __init__(self, parent, signature, shards, backshards, shape):
+ BaseArray.__init__(self, shards, backshards, shape)
self.signature = signature
self.parent = parent
- self.size = 1
- for elem in shape:
- self.size *= elem
self.invalidates = parent.invalidates
def get_concrete(self):
@@ -561,12 +627,12 @@
self.parent.get_concrete()
return self
- def eval(self, i):
- return self.parent.eval(self.calc_index(i))
+ def eval(self, offset):
+ return self.parent.eval(offset)
@unwrap_spec(item=int)
def setitem_w(self, space, item, w_value):
- return self.parent.setitem_w(space, self.calc_index(item), w_value)
+ return self.parent.setitem_w(space, item, w_value)
def setitem(self, item, value):
# This is currently not possible to be called from anywhere.
@@ -577,17 +643,18 @@
return space.wrap(self.shape[0])
return space.wrap(1)
- def calc_index(self, item):
- raise NotImplementedError
-
class NDimSlice(ViewArray):
signature = signature.BaseSignature()
- _immutable_fields_ = ['shape[*]', 'chunks[*]']
+ _immutable_fields_ = ['shape[*]', 'shards[*]', 'backshards[*]', 'start']
- def __init__(self, parent, signature, chunks, shape):
- ViewArray.__init__(self, parent, signature, shape)
- self.chunks = chunks
+ def __init__(self, parent, signature, start, end, shards, backshards,
+ shape):
+ if isinstance(parent, NDimSlice):
+ parent = parent.parent
+ ViewArray.__init__(self, parent, signature, shards, backshards, shape)
+ self.start = start
+ self.end = end
def get_root_storage(self):
return self.parent.get_concrete().get_root_storage()
@@ -606,6 +673,7 @@
self._sliceloop(w_value)
def _sliceloop(self, source):
+ xxx
i = 0
while i < self.size:
slice_driver.jit_merge_point(signature=source.signature, i=i,
@@ -614,14 +682,12 @@
i += 1
def setitem(self, item, value):
+ xxx
self.parent.setitem(self.calc_index(item), value)
def get_root_shape(self):
return self.parent.get_root_shape()
- # XXX we might want to provide a custom finder of where we look for
- # a particular item, right now we'll do the calculations again
-
@jit.unroll_safe
def calc_index(self, item):
index = []
@@ -656,6 +722,7 @@
return item
def to_str(self, comma, indent=' '):
+ xxx
ret = StringBuilder()
dtype = self.find_dtype()
ndims = len(self.shape)
@@ -698,15 +765,24 @@
for j in range(self.shape[0])]))
ret.append(']')
else:
- ret.append(dtype.str_format(self.eval(0)))
+ ret.append(dtype.str_format(self.eval(self.start)))
return ret.build()
class NDimArray(BaseArray):
""" A class representing contiguous array. We know that each iteration
by say ufunc will increase the data index by one
"""
+ start = 0
+
def __init__(self, size, shape, dtype):
- BaseArray.__init__(self, shape)
+ shards = []
+ backshards = []
+ s = 1
+ for sh in shape:
+ shards.append(s)
+ backshards.append(s * (sh - 1))
+ s *= sh
+ BaseArray.__init__(self, shards, backshards, shape)
self.size = size
self.dtype = dtype
self.storage = dtype.malloc(size)
@@ -724,8 +800,8 @@
def find_dtype(self):
return self.dtype
- def eval(self, i):
- return self.dtype.getitem(self.storage, i)
+ def eval(self, offset):
+ return self.dtype.getitem(self.storage, offset)
def descr_len(self, space):
if len(self.shape):
diff --git a/pypy/module/micronumpy/test/test_numarray.py
b/pypy/module/micronumpy/test/test_numarray.py
--- a/pypy/module/micronumpy/test/test_numarray.py
+++ b/pypy/module/micronumpy/test/test_numarray.py
@@ -1,6 +1,55 @@
from pypy.module.micronumpy.test.test_base import BaseNumpyAppTest
+from pypy.module.micronumpy.interp_numarray import NDimArray
+from pypy.module.micronumpy import signature
from pypy.conftest import gettestobjspace
+class MockDtype(object):
+ signature = signature.BaseSignature()
+ def malloc(self, size):
+ return None
+
+class TestNumArrayDirect(object):
+ def newslice(self, *args):
+ return self.space.newslice(*[self.space.wrap(arg) for arg in args])
+
+ def test_shards(self):
+ a = NDimArray(100, [10, 5, 3], MockDtype())
+ assert a.shards == [1, 10, 50]
+ assert a.backshards == [9, 40, 100]
+
+ def test_create_slice(self):
+ space = self.space
+ a = NDimArray(10*5*3, [10, 5, 3], MockDtype())
+ s = a._create_slice(space, space.wrap(3))
+ assert s.start == 3
+ assert s.shards == [10, 50]
+ assert s.backshards == [40, 100]
+ s = a._create_slice(space, self.newslice(1, 9, 2))
+ assert s.start == 1
+ assert s.shards == [2, 10, 50]
+ assert s.backshards == [8, 40, 100]
+ s = a._create_slice(space, space.newtuple([
+ self.newslice(1, 5, 3), self.newslice(1, 2, 1), space.wrap(1)]))
+ assert s.start == 1
+ assert s.shape == [2, 1]
+ assert s.shards == [3, 10]
+ assert s.backshards == [6, 10]
+
+ def test_slice_of_slice(self):
+ space = self.space
+ a = NDimArray(10*5*3, [10, 5, 3], MockDtype())
+ s = a._create_slice(space, space.wrap(5))
+ s2 = s._create_slice(space, space.wrap(3))
+ assert s2.shape == [3]
+ assert s2.shards == [50]
+ assert s2.parent is a
+ assert s2.backshards == [100]
+ s = a._create_slice(space, self.newslice(1, 5, 3))
+ s2 = s._create_slice(space, space.newtuple([
+ self.newslice(None, None, None), space.wrap(2)]))
+ assert s2.shape == [2, 3]
+ assert s2.shards == [3, 50]
+ assert s2.backshards == [6, 100]
class AppTestNumArray(BaseNumpyAppTest):
def test_type(self):
@@ -53,87 +102,6 @@
assert a[0] == 1
assert a.shape == ()
- def test_repr(self):
- from numpy import array, zeros
- a = array(range(5), float)
- assert repr(a) == "array([0.0, 1.0, 2.0, 3.0, 4.0])"
- a = array([], float)
- assert repr(a) == "array([], dtype=float64)"
- a = zeros(1001)
- assert repr(a) == "array([0.0, 0.0, 0.0, ..., 0.0, 0.0, 0.0])"
- a = array(range(5), long)
- assert repr(a) == "array([0, 1, 2, 3, 4])"
- a = array([], long)
- assert repr(a) == "array([], dtype=int64)"
- a = array([True, False, True, False], "?")
- assert repr(a) == "array([True, False, True, False], dtype=bool)"
- a = zeros((3,4))
- assert repr(a) == '''array([[0.0, 0.0, 0.0, 0.0],
- [0.0, 0.0, 0.0, 0.0],
- [0.0, 0.0, 0.0, 0.0]])'''
- a = zeros((2,3,4))
- assert repr(a) == '''array([[[0.0, 0.0, 0.0, 0.0],
- [0.0, 0.0, 0.0, 0.0],
- [0.0, 0.0, 0.0, 0.0]],
-
- [[0.0, 0.0, 0.0, 0.0],
- [0.0, 0.0, 0.0, 0.0],
- [0.0, 0.0, 0.0, 0.0]]])'''
-
- def test_repr_slice(self):
- from numpy import array, zeros
- a = array(range(5), float)
- b = a[1::2]
- assert repr(b) == "array([1.0, 3.0])"
- a = zeros(2002)
- b = a[::2]
- assert repr(b) == "array([0.0, 0.0, 0.0, ..., 0.0, 0.0, 0.0])"
- a = array((range(5),range(5,10)), dtype="int16")
- b=a[1,2:]
- assert repr(b) == "array([7, 8, 9], dtype=int16)"
- #This is the way cpython numpy does it - an empty slice prints its
shape
- b=a[2:1,]
- assert repr(b) == "array([], shape=(0, 5), dtype=int16)"
-
- def test_str(self):
- from numpy import array, zeros
- a = array(range(5), float)
- assert str(a) == "[0.0 1.0 2.0 3.0 4.0]"
- assert str((2*a)[:]) == "[0.0 2.0 4.0 6.0 8.0]"
- a = zeros(1001)
- assert str(a) == "[0.0 0.0 0.0 ..., 0.0 0.0 0.0]"
-
- a = array(range(5), dtype=long)
- assert str(a) == "[0 1 2 3 4]"
- a = array([True, False, True, False], dtype="?")
- assert str(a) == "[True False True False]"
-
- a = array(range(5), dtype="int8")
- assert str(a) == "[0 1 2 3 4]"
-
- a = array(range(5), dtype="int16")
- assert str(a) == "[0 1 2 3 4]"
-
- a = array((range(5),range(5,10)), dtype="int16")
- assert str(a) == "[[0 1 2 3 4],\n [5 6 7 8 9]]"
-
- a = array(3,dtype=int)
- assert str(a) == "3"
-
- def test_str_slice(self):
- from numpy import array, zeros
- a = array(range(5), float)
- b = a[1::2]
- assert str(b) == "[1.0 3.0]"
- a = zeros(2002)
- b = a[::2]
- assert str(b) == "[0.0 0.0 0.0 ..., 0.0 0.0 0.0]"
- a = array((range(5),range(5,10)), dtype="int16")
- b=a[1,2:]
- assert str(b) == "[7 8 9]"
- b=a[2:1,]
- assert str(b) == "[]"
-
def test_getitem(self):
from numpy import array
a = array(range(5))
@@ -762,3 +730,85 @@
for i in range(4):
assert a[i] == i + 1
raises(ValueError, fromstring, "abc")
+
+class AppTestRepr(BaseNumpyAppTest):
+ def test_repr(self):
+ from numpy import array, zeros
+ a = array(range(5), float)
+ assert repr(a) == "array([0.0, 1.0, 2.0, 3.0, 4.0])"
+ a = array([], float)
+ assert repr(a) == "array([], dtype=float64)"
+ a = zeros(1001)
+ assert repr(a) == "array([0.0, 0.0, 0.0, ..., 0.0, 0.0, 0.0])"
+ a = array(range(5), long)
+ assert repr(a) == "array([0, 1, 2, 3, 4])"
+ a = array([], long)
+ assert repr(a) == "array([], dtype=int64)"
+ a = array([True, False, True, False], "?")
+ assert repr(a) == "array([True, False, True, False], dtype=bool)"
+ a = zeros((3,4))
+ assert repr(a) == '''array([[0.0, 0.0, 0.0, 0.0],
+ [0.0, 0.0, 0.0, 0.0],
+ [0.0, 0.0, 0.0, 0.0]])'''
+ a = zeros((2,3,4))
+ assert repr(a) == '''array([[[0.0, 0.0, 0.0, 0.0],
+ [0.0, 0.0, 0.0, 0.0],
+ [0.0, 0.0, 0.0, 0.0]],
+
+ [[0.0, 0.0, 0.0, 0.0],
+ [0.0, 0.0, 0.0, 0.0],
+ [0.0, 0.0, 0.0, 0.0]]])'''
+
+ def test_repr_slice(self):
+ from numpy import array, zeros
+ a = array(range(5), float)
+ b = a[1::2]
+ assert repr(b) == "array([1.0, 3.0])"
+ a = zeros(2002)
+ b = a[::2]
+ assert repr(b) == "array([0.0, 0.0, 0.0, ..., 0.0, 0.0, 0.0])"
+ a = array((range(5),range(5,10)), dtype="int16")
+ b=a[1,2:]
+ assert repr(b) == "array([7, 8, 9], dtype=int16)"
+ #This is the way cpython numpy does it - an empty slice prints its
shape
+ b=a[2:1,]
+ assert repr(b) == "array([], shape=(0, 5), dtype=int16)"
+
+ def test_str(self):
+ from numpy import array, zeros
+ a = array(range(5), float)
+ assert str(a) == "[0.0 1.0 2.0 3.0 4.0]"
+ assert str((2*a)[:]) == "[0.0 2.0 4.0 6.0 8.0]"
+ a = zeros(1001)
+ assert str(a) == "[0.0 0.0 0.0 ..., 0.0 0.0 0.0]"
+
+ a = array(range(5), dtype=long)
+ assert str(a) == "[0 1 2 3 4]"
+ a = array([True, False, True, False], dtype="?")
+ assert str(a) == "[True False True False]"
+
+ a = array(range(5), dtype="int8")
+ assert str(a) == "[0 1 2 3 4]"
+
+ a = array(range(5), dtype="int16")
+ assert str(a) == "[0 1 2 3 4]"
+
+ a = array((range(5),range(5,10)), dtype="int16")
+ assert str(a) == "[[0 1 2 3 4],\n [5 6 7 8 9]]"
+
+ a = array(3,dtype=int)
+ assert str(a) == "3"
+
+ def test_str_slice(self):
+ from numpy import array, zeros
+ a = array(range(5), float)
+ b = a[1::2]
+ assert str(b) == "[1.0 3.0]"
+ a = zeros(2002)
+ b = a[::2]
+ assert str(b) == "[0.0 0.0 0.0 ..., 0.0 0.0 0.0]"
+ a = array((range(5),range(5,10)), dtype="int16")
+ b=a[1,2:]
+ assert str(b) == "[7 8 9]"
+ b=a[2:1,]
+ assert str(b) == "[]"
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit