Author: Richard Plangger <[email protected]>
Branch: ppc-vsx-support
Changeset: r85191:daa1de3f481f
Date: 2016-06-16 18:01 +0200
http://bitbucket.org/pypy/pypy/changeset/daa1de3f481f/
Log: added vector add for powerpc, test modified to use hypothesis
diff --git a/rpython/jit/backend/ppc/codebuilder.py
b/rpython/jit/backend/ppc/codebuilder.py
--- a/rpython/jit/backend/ppc/codebuilder.py
+++ b/rpython/jit/backend/ppc/codebuilder.py
@@ -62,6 +62,7 @@
XFX = Form("CRM", "rS", "XO1")
XLL = Form("LL", "XO1")
XX1 = Form("vrT", "rA", "rB", "XO1")
+VX = Form("lvrT", "lvrA", "lvrB", "XO8")
MI = Form("rA", "rS", "SH", "MB", "ME", "Rc")
MB = Form("rA", "rS", "rB", "MB", "ME", "Rc")
@@ -584,6 +585,9 @@
stxvd2x = XX1(31, XO1=972)
stxvw4x = XX1(31, XO1=908)
+ # integer
+ vaddudm = VX(4, XO8=192)
+
class PPCAssembler(BasicPPCAssembler, PPCVSXAssembler):
BA = BasicPPCAssembler
diff --git a/rpython/jit/backend/ppc/ppc_field.py
b/rpython/jit/backend/ppc/ppc_field.py
--- a/rpython/jit/backend/ppc/ppc_field.py
+++ b/rpython/jit/backend/ppc/ppc_field.py
@@ -44,6 +44,13 @@
"TO": ( 6, 10),
"UIMM": (16, 31),
"vrT": (6, 31, 'unsigned', regname._V, 'overlap'),
+ # low vector register T (low in a sense:
+ # can only address 32 vector registers)
+ "lvrT": (6, 10, 'unsigned', regname._V),
+ # low vector register A
+ "lvrA": (11, 15, 'unsigned', regname._V),
+ # low vector register B
+ "lvrB": (16, 20, 'unsigned', regname._V),
"XO1": (21, 30),
"XO2": (22, 30),
"XO3": (26, 30),
@@ -51,6 +58,7 @@
"XO5": (27, 29),
"XO6": (21, 29),
"XO7": (27, 30),
+ "XO8": (21, 31),
"LL": ( 9, 10),
}
@@ -102,16 +110,16 @@
value = super(sh, self).decode(inst)
return (value & 32) << 5 | (value >> 10 & 31)
-class tx(Field):
- def encode(self, value):
- value = (value & 31) << 20 | (value & 32) >> 5
- return super(tx, self).encode(value)
- def decode(self, inst):
- value = super(tx, self).decode(inst)
- return (value & 32) << 5 | (value >> 20 & 31)
- def r(self):
- import pdb; pdb.set_trace()
- return super(tx, self).r()
+# ??? class tx(Field):
+# ??? def encode(self, value):
+# ??? value = (value & 31) << 20 | (value & 32) >> 5
+# ??? return super(tx, self).encode(value)
+# ??? def decode(self, inst):
+# ??? value = super(tx, self).decode(inst)
+# ??? return (value & 32) << 5 | (value >> 20 & 31)
+# ??? def r(self):
+# ??? import pdb; pdb.set_trace()
+# ??? return super(tx, self).r()
# other special fields?
ppc_fields = {
@@ -121,7 +129,7 @@
"mbe": mbe("mbe", *fields["mbe"]),
"sh": sh("sh", *fields["sh"]),
"spr": spr("spr", *fields["spr"]),
- "vrT": tx("vrT", *fields["vrT"]),
+ # ??? "vrT": tx("vrT", *fields["vrT"]),
}
for f in fields:
diff --git a/rpython/jit/backend/ppc/rassemblermaker.py
b/rpython/jit/backend/ppc/rassemblermaker.py
--- a/rpython/jit/backend/ppc/rassemblermaker.py
+++ b/rpython/jit/backend/ppc/rassemblermaker.py
@@ -47,7 +47,7 @@
body.append('sh1 = (%s & 31) << 10 | (%s & 32) >> 5' % (value,
value))
value = 'sh1'
elif field.name == 'vrT':
- body.append('vrT1 = (%s & 31) << 20 | (%s & 32) >> 5' % (value,
value))
+ body.append('vrT1 = (%s & 31) << 21 | (%s & 32) >> 5' % (value,
value))
value = 'vrT1'
if isinstance(field, IField):
body.append('v |= ((%3s >> 2) & r_uint(%#05x)) << 2' % (value,
field.mask))
diff --git a/rpython/jit/backend/ppc/regalloc.py
b/rpython/jit/backend/ppc/regalloc.py
--- a/rpython/jit/backend/ppc/regalloc.py
+++ b/rpython/jit/backend/ppc/regalloc.py
@@ -256,6 +256,8 @@
if var is not None:
if var.type == FLOAT:
self.fprm.possibly_free_var(var)
+ elif var.is_vector() and var.type != VOID:
+ self.vrm.possibly_free_var(var)
else:
self.rm.possibly_free_var(var)
@@ -309,10 +311,10 @@
#
for j in range(op.numargs()):
box = op.getarg(j)
- if box.type != FLOAT:
+ if box.is_vector():
+ self.vrm.temp_boxes.append(box)
+ elif box.type != FLOAT:
self.rm.temp_boxes.append(box)
- elif box.is_vector():
- self.vrm.temp_boxes.append(box)
else:
self.fprm.temp_boxes.append(box)
#
@@ -436,6 +438,7 @@
# temporary boxes and all the current operation's arguments
self.rm.free_temp_vars()
self.fprm.free_temp_vars()
+ self.vrm.free_temp_vars()
# ******************************************************
# * P R E P A R E O P E R A T I O N S *
diff --git a/rpython/jit/backend/ppc/vector_ext.py
b/rpython/jit/backend/ppc/vector_ext.py
--- a/rpython/jit/backend/ppc/vector_ext.py
+++ b/rpython/jit/backend/ppc/vector_ext.py
@@ -49,9 +49,9 @@
def _vec_load(self, resloc, baseloc, indexloc, integer, itemsize, aligned):
if integer:
if itemsize == 4:
+ self.mc.lxvw4x(resloc.value, indexloc.value, baseloc.value)
+ elif itemsize == 8:
self.mc.lxvd2x(resloc.value, indexloc.value, baseloc.value)
- elif itemsize == 8:
- self.mc.lxvw4x(resloc.value, indexloc.value, baseloc.value)
else:
raise NotImplementedError
else:
@@ -62,6 +62,48 @@
else:
raise NotImplementedError
+ def _emit_vec_setitem(self, op, arglocs, regalloc):
+ # prepares item scale (raw_store does not)
+ base_loc, ofs_loc, value_loc, size_loc, baseofs, integer_loc,
aligned_loc = arglocs
+ scale = get_scale(size_loc.value)
+ dest_loc = addr_add(base_loc, ofs_loc, baseofs.value, scale)
+ self._vec_store(dest_loc, value_loc, integer_loc.value,
+ size_loc.value, aligned_loc.value)
+
+ genop_discard_vec_setarrayitem_raw = _emit_vec_setitem
+ genop_discard_vec_setarrayitem_gc = _emit_vec_setitem
+
+ def emit_vec_raw_store(self, op, arglocs, regalloc):
+ baseloc, ofsloc, valueloc, size_loc, baseofs, \
+ integer_loc, aligned_loc = arglocs
+ #dest_loc = addr_add(base_loc, ofs_loc, baseofs.value, 0)
+ assert baseofs.value == 0
+ self._vec_store(baseloc, ofsloc, valueloc, integer_loc.value,
+ size_loc.value, aligned_loc.value)
+
+ def _vec_store(self, baseloc, indexloc, valueloc, integer, itemsize,
aligned):
+ if integer:
+ if itemsize == 4:
+ self.mc.stxvw4x(valueloc.value, indexloc.value, baseloc.value)
+ elif itemsize == 8:
+ self.mc.stxvd2x(valueloc.value, indexloc.value, baseloc.value)
+ else:
+ raise NotImplementedError
+ else:
+ raise NotImplementedError
+
+
+ def emit_vec_int_add(self, op, arglocs, regalloc):
+ resloc, loc0, loc1, size_loc = arglocs
+ size = size_loc.value
+ if size == 1:
+ raise NotImplementedError
+ elif size == 2:
+ raise NotImplementedError
+ elif size == 4:
+ raise NotImplementedError
+ elif size == 8:
+ self.mc.vaddudm(resloc.value, loc0.value, loc1.value)
#def genop_guard_vec_guard_true(self, guard_op, guard_token, locs, resloc):
# self.implement_guard(guard_token)
@@ -167,35 +209,6 @@
# not_implemented("reduce sum for %s not impl." % arg)
- #def _genop_discard_vec_setarrayitem(self, op, arglocs):
- # # prepares item scale (raw_store does not)
- # base_loc, ofs_loc, value_loc, size_loc, baseofs, integer_loc,
aligned_loc = arglocs
- # scale = get_scale(size_loc.value)
- # dest_loc = addr_add(base_loc, ofs_loc, baseofs.value, scale)
- # self._vec_store(dest_loc, value_loc, integer_loc.value,
- # size_loc.value, aligned_loc.value)
-
- #genop_discard_vec_setarrayitem_raw = _genop_discard_vec_setarrayitem
- #genop_discard_vec_setarrayitem_gc = _genop_discard_vec_setarrayitem
-
- #def genop_discard_vec_raw_store(self, op, arglocs):
- # base_loc, ofs_loc, value_loc, size_loc, baseofs, integer_loc,
aligned_loc = arglocs
- # dest_loc = addr_add(base_loc, ofs_loc, baseofs.value, 0)
- # self._vec_store(dest_loc, value_loc, integer_loc.value,
- # size_loc.value, aligned_loc.value)
-
- #def _vec_store(self, dest_loc, value_loc, integer, itemsize, aligned):
- # if integer:
- # if aligned:
- # self.mc.MOVDQA(dest_loc, value_loc)
- # else:
- # self.mc.MOVDQU(dest_loc, value_loc)
- # else:
- # if itemsize == 4:
- # self.mc.MOVUPS(dest_loc, value_loc)
- # elif itemsize == 8:
- # self.mc.MOVUPD(dest_loc, value_loc)
-
#def genop_vec_int_is_true(self, op, arglocs, resloc):
# loc, sizeloc = arglocs
# temp = X86_64_XMM_SCRATCH_REG
@@ -219,18 +232,6 @@
# # There is no 64x64 bit packed mul. For 8 bit either. It is
questionable if it gives any benefit?
# not_implemented("int8/64 mul")
- #def genop_vec_int_add(self, op, arglocs, resloc):
- # loc0, loc1, size_loc = arglocs
- # size = size_loc.value
- # if size == 1:
- # self.mc.PADDB(loc0, loc1)
- # elif size == 2:
- # self.mc.PADDW(loc0, loc1)
- # elif size == 4:
- # self.mc.PADDD(loc0, loc1)
- # elif size == 8:
- # self.mc.PADDQ(loc0, loc1)
-
#def genop_vec_int_sub(self, op, arglocs, resloc):
# loc0, loc1, size_loc = arglocs
# size = size_loc.value
@@ -525,6 +526,11 @@
forbidden_vars = self.vrm.temp_boxes
return self.vrm.force_allocate_reg(op, forbidden_vars)
+ def ensure_vector_reg(self, box):
+ loc = self.vrm.make_sure_var_in_reg(box,
+ forbidden_vars=self.vrm.temp_boxes)
+ return loc
+
def _prepare_load(self, op):
descr = op.getdescr()
assert isinstance(descr, ArrayDescr)
@@ -549,43 +555,49 @@
prepare_vec_raw_load_i = _prepare_load
prepare_vec_raw_load_f = _prepare_load
- #def _prepare_vec_setarrayitem(self, op):
- # descr = op.getdescr()
- # assert isinstance(descr, ArrayDescr)
- # assert not descr.is_array_of_pointers() and \
- # not descr.is_array_of_structs()
- # itemsize, ofs, _ = unpack_arraydescr(descr)
- # args = op.getarglist()
- # base_loc = self.rm.make_sure_var_in_reg(op.getarg(0), args)
- # value_loc = self.make_sure_var_in_reg(op.getarg(2), args)
- # ofs_loc = self.rm.make_sure_var_in_reg(op.getarg(1), args)
+ def prepare_vec_arith(self, op):
+ a0 = op.getarg(0)
+ a1 = op.getarg(1)
+ assert isinstance(op, VectorOp)
+ size = op.bytesize
+ args = op.getarglist()
+ loc0 = self.ensure_vector_reg(a0)
+ loc1 = self.ensure_vector_reg(a1)
+ resloc = self.force_allocate_vector_reg(op)
+ return [resloc, loc0, loc1, imm(size)]
- # integer = not (descr.is_array_of_floats() or descr.getconcrete_type()
== FLOAT)
- # aligned = False
- # self.perform_discard(op, [base_loc, ofs_loc, value_loc,
- # imm(itemsize), imm(ofs), imm(integer),
imm(aligned)])
-
- #prepare_vec_setarrayitem_raw = _prepare_vec_setarrayitem
- #prepare_vec_setarrayitem_gc = _prepare_vec_setarrayitem
- #prepare_vec_raw_store = _prepare_vec_setarrayitem
-
- #def prepare_vec_arith(self, op):
- # lhs = op.getarg(0)
- # assert isinstance(op, VectorOp)
- # size = op.bytesize
- # args = op.getarglist()
- # loc1 = self.make_sure_var_in_reg(op.getarg(1), args)
- # loc0 = self.xrm.force_result_in_reg(op, op.getarg(0), args)
- # self.perform(op, [loc0, loc1, imm(size)], loc0)
-
- #prepare_vec_int_add = prepare_vec_arith
+ prepare_vec_int_add = prepare_vec_arith
#prepare_vec_int_sub = prepare_vec_arith
#prepare_vec_int_mul = prepare_vec_arith
#prepare_vec_float_add = prepare_vec_arith
#prepare_vec_float_sub = prepare_vec_arith
#prepare_vec_float_mul = prepare_vec_arith
#prepare_vec_float_truediv = prepare_vec_arith
- #del prepare_vec_arith
+ del prepare_vec_arith
+
+ def _prepare_vec_store(self, op):
+ descr = op.getdescr()
+ assert isinstance(descr, ArrayDescr)
+ assert not descr.is_array_of_pointers() and \
+ not descr.is_array_of_structs()
+ itemsize, ofs, _ = unpack_arraydescr(descr)
+ a0 = op.getarg(0)
+ a1 = op.getarg(1)
+ a2 = op.getarg(2)
+ baseloc = self.ensure_reg(a0)
+ ofsloc = self.ensure_reg(a1)
+ valueloc = self.ensure_vector_reg(a2)
+
+ integer = not (descr.is_array_of_floats() or descr.getconcrete_type()
== FLOAT)
+ aligned = False
+ return [baseloc, ofsloc, valueloc,
+ imm(itemsize), imm(ofs), imm(integer), imm(aligned)]
+
+ prepare_vec_setarrayitem_raw = _prepare_vec_store
+ prepare_vec_setarrayitem_gc = _prepare_vec_store
+ prepare_vec_raw_store = _prepare_vec_store
+ del _prepare_vec_store
+
#def prepare_vec_arith_unary(self, op):
# lhs = op.getarg(0)
diff --git a/rpython/jit/metainterp/test/test_vector.py
b/rpython/jit/metainterp/test/test_vector.py
--- a/rpython/jit/metainterp/test/test_vector.py
+++ b/rpython/jit/metainterp/test/test_vector.py
@@ -1,5 +1,6 @@
import py
+from hypothesis import given, note, strategies as st
from rpython.jit.metainterp.warmspot import ll_meta_interp, get_stats
from rpython.jit.metainterp.test.support import LLJitMixin
from rpython.jit.codewriter.policy import StopAtXPolicy
@@ -23,6 +24,8 @@
def free(mem):
lltype.free(mem, flavor='raw')
+integers_64bit = st.integers(min_value=-2**63, max_value=2**63-1)
+
class VectorizeTests:
enable_opts =
'intbounds:rewrite:virtualize:string:earlyforce:pure:heap:unroll'
@@ -37,21 +40,26 @@
type_system=self.type_system,
vec=vec, vec_all=vec_all)
- @py.test.mark.parametrize('i',[3,4,5,6,7,8,9,50])
- def test_vectorize_simple_load_arith_store_int_add_index(self,i):
+ @given(st.lists(integers_64bit, min_size=5, max_size=50),
+ st.lists(integers_64bit, min_size=5, max_size=50))
+ def test_vector_simple(self, la, lb):
myjitdriver = JitDriver(greens = [],
reds = 'auto',
vectorize=True)
+ i = min(len(la), len(lb))
+ la = la[:i]
+ lb = lb[:i]
+ bc = i*rffi.sizeof(rffi.SIGNED)
+ vc = alloc_raw_storage(bc, zero=True)
+ size = rffi.sizeof(rffi.SIGNED)
def f(d):
- bc = d*rffi.sizeof(rffi.SIGNED)
va = alloc_raw_storage(bc, zero=True)
vb = alloc_raw_storage(bc, zero=True)
- vc = alloc_raw_storage(bc, zero=True)
x = 1
for i in range(d):
- j = i*rffi.sizeof(rffi.SIGNED)
- raw_storage_setitem(va, j, rffi.cast(rffi.SIGNED,i))
- raw_storage_setitem(vb, j, rffi.cast(rffi.SIGNED,i))
+ j = i*size
+ raw_storage_setitem(va, j, rffi.cast(rffi.SIGNED,la[i]))
+ raw_storage_setitem(vb, j, rffi.cast(rffi.SIGNED,lb[i]))
i = 0
while i < bc:
myjitdriver.jit_merge_point()
@@ -59,17 +67,15 @@
b = raw_storage_getitem(rffi.SIGNED,vb,i)
c = a+b
raw_storage_setitem(vc, i, rffi.cast(rffi.SIGNED,c))
- i += 1*rffi.sizeof(rffi.SIGNED)
- res = 0
- for i in range(d):
- res +=
raw_storage_getitem(rffi.SIGNED,vc,i*rffi.sizeof(rffi.SIGNED))
+ i += 1*size
free_raw_storage(va)
free_raw_storage(vb)
- free_raw_storage(vc)
- return res
- res = self.meta_interp(f, [i])
- assert res == f(i)
+ self.meta_interp(f, [i])
+ for p in range(i):
+ c = raw_storage_getitem(rffi.SIGNED,vc,p*size)
+ assert intmask(la[p] + lb[p]) == c
+ free_raw_storage(vc)
@py.test.mark.parametrize('i',[1,2,3,8,17,128,130,131,142,143])
def test_vectorize_array_get_set(self,i):
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit