Author: Richard Plangger <[email protected]>
Branch: vecopt2
Changeset: r77104:df7310e361b1
Date: 2015-04-01 14:35 +0200
http://bitbucket.org/pypy/pypy/changeset/df7310e361b1/
Log: memory reference now correctly tracks if bytes overlap (previously
the cell stride was ignored)
diff --git a/rpython/jit/metainterp/optimizeopt/dependency.py
b/rpython/jit/metainterp/optimizeopt/dependency.py
--- a/rpython/jit/metainterp/optimizeopt/dependency.py
+++ b/rpython/jit/metainterp/optimizeopt/dependency.py
@@ -190,12 +190,13 @@
pass
# handle fail args
op = self.operations[guard_idx]
- for arg in op.getfailargs():
- try:
- def_idx = tracker.definition_index(arg)
- self._put_edge(def_idx, guard_idx, arg)
- except KeyError:
- assert False
+ if op.getfailargs():
+ for arg in op.getfailargs():
+ try:
+ def_idx = tracker.definition_index(arg)
+ self._put_edge(def_idx, guard_idx, arg)
+ except KeyError:
+ assert False
#
# guards check overflow or raise are directly dependent
# find the first non guard operation
@@ -613,6 +614,7 @@
will result in the linear combination i0 * (2/1) + 2
"""
def __init__(self, array, origin, descr):
+ assert descr is not None
self.array = array
self.origin = origin
self.descr = descr
@@ -623,15 +625,21 @@
def is_adjacent_to(self, other):
""" this is a symmetric relation """
match, off = self.calc_difference(other)
- if match:
- return off == 1 or off == -1
+ stride = self.stride()
+ if match and stride != 0:
+ return abs(off) - stride == 0
return False
+ def stride(self):
+ """ the stride in bytes """
+ return self.descr.get_item_size_in_bytes()
+
def is_adjacent_after(self, other):
""" the asymetric relation to is_adjacent_to """
match, off = self.calc_difference(other)
- if match:
- return off == 1
+ stride = self.stride()
+ if match and stride != 0:
+ return off == stride # must be equal to the positive stride
return False
def indices_can_alias(self, other):
@@ -641,7 +649,7 @@
"""
match, off = self.calc_difference(other)
if match:
- return off == 0
+ return abs(off) < self.stride()
return False
def __eq__(self, other):
@@ -658,6 +666,7 @@
return self.array == other.array
def calc_difference(self, other):
+ """ calculates the difference in bytes as second return value """
assert isinstance(other, MemoryRef)
if self.array == other.array \
and self.origin == other.origin:
@@ -670,4 +679,3 @@
def __repr__(self):
return 'MemoryRef(%s*(%s/%s)+%s)' % (self.origin, self.coefficient_mul,
self.coefficient_div,
self.constant)
-
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_dependency.py
b/rpython/jit/metainterp/optimizeopt/test/test_dependency.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_dependency.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_dependency.py
@@ -297,9 +297,9 @@
def test_setarrayitem_dont_depend_with_memref_info(self):
ops="""
[p0, i1] # 0: 1,2,3,4
- setarrayitem_raw(p0, i1, 1, descr=floatarraydescr) # 1: 4
+ setarrayitem_raw(p0, i1, 1, descr=chararraydescr) # 1: 4
i2 = int_add(i1,1) # 2: 3
- setarrayitem_raw(p0, i2, 2, descr=floatarraydescr) # 3: 4
+ setarrayitem_raw(p0, i2, 2, descr=chararraydescr) # 3: 4
jump(p0, i1) # 4:
"""
self.assert_dependencies(ops, memref=True, full_check=True)
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_util.py
b/rpython/jit/metainterp/optimizeopt/test/test_util.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_util.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_util.py
@@ -154,7 +154,10 @@
arraydescr = cpu.arraydescrof(lltype.GcArray(lltype.Signed))
floatarraydescr = cpu.arraydescrof(lltype.GcArray(lltype.Float))
+ intarraydescr = cpu.arraydescrof(lltype.GcArray(lltype.Signed))
+ uintarraydescr = cpu.arraydescrof(lltype.GcArray(lltype.Unsigned))
chararraydescr = cpu.arraydescrof(lltype.GcArray(lltype.Char))
+ singlefloatarraydescr =
cpu.arraydescrof(lltype.GcArray(lltype.SingleFloat))
# a GcStruct not inheriting from OBJECT
S = lltype.GcStruct('TUPLE', ('a', lltype.Signed), ('b', lltype.Ptr(NODE)))
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
@@ -647,7 +647,7 @@
def test_packset_init_raw_load_not_adjacent_and_adjacent(self):
ops = """
[p0,i0]
- i3 = raw_load(p0, i0, descr=floatarraydescr)
+ i3 = raw_load(p0, i0, descr=chararraydescr)
jump(p0,i0)
"""
loop = self.parse_loop(ops)
@@ -657,12 +657,13 @@
ops = """
[p0,i0]
i2 = int_add(i0,1)
- raw_load(p0, i2, descr=floatarraydescr)
+ raw_load(p0, i2, descr=chararraydescr)
jump(p0,i2)
"""
loop = self.parse_loop(ops)
vopt = self.init_packset(loop,3)
assert len(vopt.vec_info.memory_refs) == 4
+ print vopt.packset.packs
assert len(vopt.packset.packs) == 3
for i in range(3):
x = (i+1)*2
@@ -765,28 +766,32 @@
self.assert_packset_empty(vopt.packset, len(loop.operations),
[(5,11), (4,10), (6,12)])
- def test_packset_combine_simple(self):
+ @pytest.mark.parametrize("descr,stride",
+ [('char',1),('float',8),('int',8),('singlefloat',4)])
+ def test_packset_combine_simple(self,descr,stride):
ops = """
[p0,i0]
- i3 = getarrayitem_gc(p0, i0, descr=floatarraydescr)
- i1 = int_add(i0,1)
+ i3 = getarrayitem_gc(p0, i0, descr={descr}arraydescr)
+ i1 = int_add(i0,{stride})
jump(p0,i1)
- """
+ """.format(descr=descr,stride=stride)
loop = self.parse_loop(ops)
vopt = self.combine_packset(loop,3)
assert len(vopt.vec_info.memory_refs) == 4
assert len(vopt.packset.packs) == 1
self.assert_pack(vopt.packset.packs[0], (1,3,5,7))
- def test_packset_combine_2_loads_in_trace(self):
+ @pytest.mark.parametrize("descr,stride",
+ [('char',1),('float',8),('int',8),('singlefloat',4)])
+ def test_packset_combine_2_loads_in_trace(self, descr, stride):
ops = """
[p0,i0]
- i3 = getarrayitem_gc(p0, i0, descr=floatarraydescr)
- i1 = int_add(i0,1)
- i4 = getarrayitem_gc(p0, i1, descr=floatarraydescr)
- i2 = int_add(i1,1)
+ i3 = getarrayitem_gc(p0, i0, descr={type}arraydescr)
+ i1 = int_add(i0,{stride})
+ i4 = getarrayitem_gc(p0, i1, descr={type}arraydescr)
+ i2 = int_add(i1,{stride})
jump(p0,i2)
- """
+ """.format(type=descr,stride=stride)
loop = self.parse_loop(ops)
vopt = self.combine_packset(loop,3)
assert len(vopt.vec_info.memory_refs) == 8
@@ -831,59 +836,140 @@
except NotAVectorizeableLoop:
pass
- def test_packset_vector_operation(self):
- for op in ['int_add', 'int_sub', 'int_mul']:
- ops = """
- [p0,p1,p2,i0]
- i1 = int_add(i0, 1)
- i10 = int_le(i1, 128)
- guard_true(i10) []
- i2 = getarrayitem_gc(p0, i0, descr=floatarraydescr)
- i3 = getarrayitem_gc(p1, i0, descr=floatarraydescr)
- i4 = {op}(i2,i3)
- setarrayitem_gc(p2, i0, i4, descr=floatarraydescr)
- jump(p0,p1,p2,i1)
- """.format(op=op)
- loop = self.parse_loop(ops)
- vopt = self.combine_packset(loop,3)
- assert len(vopt.vec_info.memory_refs) == 12
- assert len(vopt.packset.packs) == 4
+ @pytest.mark.parametrize("op,descr,stride",
+ [('int_add','char',1),
+ ('int_sub','char',1),
+ ('int_mul','char',1),
+ ('float_add','float',8),
+ ('float_sub','float',8),
+ ('float_mul','float',8),
+ ('float_add','singlefloat',4),
+ ('float_sub','singlefloat',4),
+ ('float_mul','singlefloat',4),
+ ('int_add','int',8),
+ ('int_sub','int',8),
+ ('int_mul','int',8),
+ ])
+ def test_packset_vector_operation(self, op, descr, stride):
+ ops = """
+ [p0,p1,p2,i0]
+ i1 = int_add(i0, {stride})
+ i10 = int_le(i1, 128)
+ guard_true(i10) []
+ i2 = getarrayitem_gc(p0, i0, descr={descr}arraydescr)
+ i3 = getarrayitem_gc(p1, i0, descr={descr}arraydescr)
+ i4 = {op}(i2,i3)
+ setarrayitem_gc(p2, i0, i4, descr={descr}arraydescr)
+ jump(p0,p1,p2,i1)
+ """.format(op=op,descr=descr,stride=stride)
+ loop = self.parse_loop(ops)
+ vopt = self.combine_packset(loop,3)
+ assert len(vopt.vec_info.memory_refs) == 12
+ assert len(vopt.packset.packs) == 4
- for opindices in [(4,11,18,25),(5,12,19,26),
- (6,13,20,27),(7,14,21,28)]:
- self.assert_has_pack_with(vopt.packset, opindices)
+ for opindices in [(4,11,18,25),(5,12,19,26),
+ (6,13,20,27),(7,14,21,28)]:
+ self.assert_has_pack_with(vopt.packset, opindices)
- @pytest.mark.parametrize('op',
['int_mul','int_add','int_sub','float_mul','float_add','float_sub'])
- def test_schedule_vector_operation(self, op):
+ @pytest.mark.parametrize('op,descr,stride',
+ [('int_add','char',1),
+ ('int_sub','char',1),
+ ('int_mul','char',1),
+ ('float_add','float',8),
+ ('float_sub','float',8),
+ ('float_mul','float',8),
+ ('float_add','singlefloat',4),
+ ('float_sub','singlefloat',4),
+ ('float_mul','singlefloat',4),
+ ('int_add','int',8),
+ ('int_sub','int',8),
+ ('int_mul','int',8),
+ ])
+ def test_schedule_vector_operation(self, op, descr, stride):
ops = """
[p0,p1,p2,i0] # 0
i10 = int_le(i0, 128) # 1, 8, 15, 22
guard_true(i10) [p0,p1,p2,i0] # 2, 9, 16, 23
- i2 = getarrayitem_gc(p0, i0, descr=floatarraydescr) # 3, 10, 17, 24
- i3 = getarrayitem_gc(p1, i0, descr=floatarraydescr) # 4, 11, 18, 25
+ i2 = getarrayitem_gc(p0, i0, descr={descr}arraydescr) # 3, 10, 17, 24
+ i3 = getarrayitem_gc(p1, i0, descr={descr}arraydescr) # 4, 11, 18, 25
i4 = {op}(i2,i3) # 5, 12, 19, 26
- setarrayitem_gc(p2, i0, i4, descr=floatarraydescr) # 6, 13, 20, 27
- i1 = int_add(i0, 1) # 7, 14, 21, 28
+ setarrayitem_gc(p2, i0, i4, descr={descr}arraydescr) # 6, 13, 20, 27
+ i1 = int_add(i0, {stride}) # 7, 14, 21, 28
jump(p0,p1,p2,i1) # 29
- """.format(op=op)
+ """.format(op=op,descr=descr,stride=stride)
vops = """
[p0,p1,p2,i0]
i10 = int_le(i0, 128)
guard_true(i10) [p0,p1,p2,i0]
- i1 = int_add(i0, 1)
+ i1 = int_add(i0, {stride})
i11 = int_le(i1, 128)
guard_true(i11) [p0,p1,p2,i0]
- v1 = vec_raw_load(p0, i0, 2, descr=floatarraydescr)
- v2 = vec_raw_load(p1, i0, 2, descr=floatarraydescr)
- i12 = int_add(i1, 1)
+ v1 = vec_raw_load(p0, i0, 2, descr={descr}arraydescr)
+ v2 = vec_raw_load(p1, i0, 2, descr={descr}arraydescr)
+ i12 = int_add(i1, {stride})
v3 = {op}(v1,v2)
- vec_raw_store(p2, i0, v3, 2, descr=floatarraydescr)
+ vec_raw_store(p2, i0, v3, 2, descr={descr}arraydescr)
jump(p0,p1,p2,i12)
- """.format(op='vec_'+op)
+ """.format(op='vec_'+op,descr=descr,stride=stride)
loop = self.parse_loop(ops)
vopt = self.schedule(loop,1)
self.debug_print_operations(vopt.loop)
self.assert_equal(loop, self.parse_loop(vops))
+ @pytest.mark.parametrize('unroll', range(1,16,2))
+ def test_vectorize_index_variable_combination(self, unroll):
+ pytest.skip("implement index variable combination")
+ ops = """
+ [p0,i0]
+ i1 = raw_load(p0, i0, descr=floatarraydescr)
+ i2 = int_add(i0,1)
+ jump(p0,i2)
+ """
+ vops = """
+ [p0,i0]
+ v1 = vec_raw_load(p0, i0, {count}, descr=floatarraydescr)
+ i1 = int_add(i0,{count})
+ jump(p0,i1)
+ """.format(count=unroll+1)
+ loop = self.parse_loop(ops)
+ vopt = self.schedule(loop,unroll)
+ self.assert_equal(loop, self.parse_loop(vops))
+
+
+ def test_vectorize_raw_load_mul_index(self):
+ ops = """
+ [i0, i1, i2, i3, i4, i5, i6, i7]
+ i9 = int_mul(i0, 8)
+ i10 = raw_load(i3, i9, descr=intarraydescr)
+ i11 = int_mul(i0, 8)
+ i12 = raw_load(i3, i11, descr=intarraydescr)
+ i13 = int_add(i10, i12)
+ i14 = int_mul(i0, 8)
+ raw_store(i5, i14, i13, descr=intarraydescr)
+ i16 = int_add(i0, 1)
+ i17 = int_lt(i16, i7)
+ guard_true(i17) [i7, i13, i5, i4, i3, i12, i10, i16]
+ guard_future_condition() []
+ jump(i16, i10, i12, i3, i4, i5, i13, i7)
+ """
+ vopt = self.schedule(self.parse_loop(ops),1)
+
+ def test_vectorize_raw_load_add_index_item_byte_size(self):
+ ops = """
+ [i0, i1, i2, i3, i4, i5, i6, i7]
+ i8 = raw_load(i3, i0, descr=intarraydescr)
+ i9 = raw_load(i3, i0, descr=intarraydescr)
+ i10 = int_add(i8, i9)
+ raw_store(i5, i0, i10, descr=intarraydescr)
+ i12 = int_add(i0, 8)
+ i14 = int_mul(i7, 8)
+ i15 = int_lt(i12, i14)
+ guard_true(i15) [i7, i10, i5, i4, i3, i9, i8, i12]
+ guard_future_condition() []
+ jump(i12, i8, i9, i3, i4, i5, i10, i7)
+ """
+ vopt = self.schedule(self.parse_loop(ops),1)
+
+
class TestLLtype(BaseTestVectorize, LLtypeMixin):
pass
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py
b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -221,7 +221,7 @@
if self.packset.can_be_packed(a_opidx, b_opidx,
a_memref, b_memref):
self.packset.add_pair(a_opidx, b_opidx,
- a_memref, b_memref)
+ a_memref, b_memref)
def extend_packset(self):
pack_count = self.packset.pack_count()
diff --git a/rpython/jit/metainterp/test/test_vectorize.py
b/rpython/jit/metainterp/test/test_vectorize.py
--- a/rpython/jit/metainterp/test/test_vectorize.py
+++ b/rpython/jit/metainterp/test/test_vectorize.py
@@ -21,27 +21,42 @@
CPUClass=self.CPUClass,
type_system=self.type_system)
- def test_simple_raw_load(self):
+ def test_vectorize_simple_load_arith_store(self):
myjitdriver = JitDriver(greens = [],
- reds = ['i', 'res', 'va','c'],
+ reds = ['i','a','b','va','vb','vc','c','d'],
vectorize=True)
- def f(c):
- res = 0
- va = alloc_raw_storage(c*rffi.sizeof(rffi.SIGNED), zero=True)
- for i in range(c):
+ def f(d):
+ va = alloc_raw_storage(d*rffi.sizeof(rffi.SIGNED), zero=True)
+ vb = alloc_raw_storage(d*rffi.sizeof(rffi.SIGNED), zero=True)
+ vc = alloc_raw_storage(d*rffi.sizeof(rffi.SIGNED), zero=True)
+ for i in range(d):
raw_storage_setitem(va, i*rffi.sizeof(rffi.SIGNED),
rffi.cast(rffi.SIGNED,i))
+ raw_storage_setitem(vb, i*rffi.sizeof(rffi.SIGNED),
+ rffi.cast(rffi.SIGNED,i))
i = 0
- while i < c:
- myjitdriver.can_enter_jit(i=i, res=res, va=va, c=c)
- myjitdriver.jit_merge_point(i=i, res=res, va=va, c=c)
- res +=
raw_storage_getitem(rffi.SIGNED,va,i*rffi.sizeof(rffi.SIGNED))
+ a = 0
+ b = 0
+ c = 0
+ while i < d:
+ myjitdriver.can_enter_jit(i=i, a=a, b=b, va=va, vb=vb, vc=vc,
d=d, c=c)
+ myjitdriver.jit_merge_point(i=i, a=a, b=b, va=va, vb=vb,
vc=vc, d=d, c=c)
+ a =
raw_storage_getitem(rffi.SIGNED,va,i*rffi.sizeof(rffi.SIGNED))
+ b =
raw_storage_getitem(rffi.SIGNED,va,i*rffi.sizeof(rffi.SIGNED))
+ c = a+b
+ raw_storage_setitem(vc, i*rffi.sizeof(rffi.SIGNED),
rffi.cast(rffi.SIGNED,c))
i += 1
+ res = 0
+ for i in range(d):
+ res +=
raw_storage_getitem(rffi.SIGNED,vc,i*rffi.sizeof(rffi.SIGNED))
+
free_raw_storage(va)
+ free_raw_storage(vb)
+ free_raw_storage(vc)
return res
i = 32
res = self.meta_interp(f, [i])
- assert res == sum(range(i))
+ assert res == sum(range(i)) + sum(range(i))
self.check_trace_count(1)
class TestLLtype(VectorizeTest, LLJitMixin):
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit