Author: Richard Plangger <[email protected]>
Branch: ppc-vsx-support
Changeset: r85435:a404127ebe68
Date: 2016-06-28 17:14 +0200
http://bitbucket.org/pypy/pypy/changeset/a404127ebe68/
Log: add the backend functions for vec_int_eq/ne + test, implement
vec_expand_f, test passes that uses vec_expand_f
diff --git a/rpython/jit/backend/llsupport/rewrite.py
b/rpython/jit/backend/llsupport/rewrite.py
--- a/rpython/jit/backend/llsupport/rewrite.py
+++ b/rpython/jit/backend/llsupport/rewrite.py
@@ -160,7 +160,7 @@
factor, offset):
factor, offset, new_index_box = cpu_simplify_scale(self.cpu,
index_box, factor, offset)
if index_box is not new_index_box:
- self.emit_op(index_box)
+ self.emit_op(new_index_box)
return factor, offset, new_index_box
def emit_gc_load_or_indexed(self, op, ptr_box, index_box, itemsize,
diff --git a/rpython/jit/backend/llsupport/vector_ext.py
b/rpython/jit/backend/llsupport/vector_ext.py
--- a/rpython/jit/backend/llsupport/vector_ext.py
+++ b/rpython/jit/backend/llsupport/vector_ext.py
@@ -1,3 +1,4 @@
+from rpython.jit.backend.llsupport.rewrite import cpu_simplify_scale
from rpython.jit.backend.llsupport.descr import (unpack_arraydescr,
unpack_fielddescr, unpack_interiorfielddescr)
from rpython.rlib.objectmodel import specialize, always_inline
@@ -130,7 +131,7 @@
if opnum in (rop.SETARRAYITEM_GC, rop.SETARRAYITEM_RAW):
itemsize, basesize, _ = unpack_arraydescr(op.getdescr())
index_box = op.getarg(1)
- _, _, changed = cpu_simplify_scale(index_box, itemsize, basesize)
+ _, _, changed = cpu_simplify_scale(state.cpu, index_box, itemsize,
basesize)
if changed is not index_box:
state.oplist.append(changed)
op.setarg(1, changed)
diff --git a/rpython/jit/backend/ppc/vector_ext.py
b/rpython/jit/backend/ppc/vector_ext.py
--- a/rpython/jit/backend/ppc/vector_ext.py
+++ b/rpython/jit/backend/ppc/vector_ext.py
@@ -16,8 +16,10 @@
from rpython.jit.backend.ppc.arch import PARAM_SAVE_AREA_OFFSET
import rpython.jit.backend.ppc.register as r
import rpython.jit.backend.ppc.condition as c
+import rpython.jit.backend.ppc.locations as l
from rpython.jit.backend.llsupport.asmmemmgr import MachineDataBlockWrapper
from rpython.rtyper.lltypesystem import lltype, rffi
+from rpython.jit.codewriter import longlong
def not_implemented(msg):
msg = '[ppc/vector_ext] %s\n' % msg
@@ -441,13 +443,14 @@
res, l0, l1, sizeloc = arglocs
size = sizeloc.value
if size == 1:
- self.vcmpequbx(res.value, l0.value, l1.value)
+ self.mc.vcmpequbx(res.value, l0.value, l1.value)
elif size == 2:
- self.vcmpequhx(res.value, l0.value, l1.value)
+ self.mc.vcmpequhx(res.value, l0.value, l1.value)
elif size == 4:
- self.vcmpequwx(res.value, l0.value, l1.value)
+ self.mc.vcmpequwx(res.value, l0.value, l1.value)
elif size == 8:
- self.vcmpequdx(res.value, l0.value, l1.value)
+ self.mc.vcmpequdx(res.value, l0.value, l1.value)
+ flush_vec_cc(self, regalloc, c.EQ, op.bytesize, res)
def emit_vec_int_ne(self, op, arglocs, regalloc):
res, l0, l1, sizeloc = arglocs
@@ -455,62 +458,55 @@
tmp = regalloc.get_scratch_reg().value
self.mc.vxor(tmp, tmp, tmp)
if size == 1:
- self.vcmpequb(res.value, l0.value, l1.value)
- self.vcmpequbx(res.value, res.value, tmp)
+ self.mc.vcmpequbx(res.value, res.value, tmp)
elif size == 2:
- self.vcmpequh(res.value, l0.value, l1.value)
- self.vcmpequhx(res.value, res.value, tmp)
+ self.mc.vcmpequhx(res.value, res.value, tmp)
elif size == 4:
- self.vcmpequw(res.value, l0.value, l1.value)
- self.vcmpequwx(res.value, res.value, tmp)
+ self.mc.vcmpequwx(res.value, res.value, tmp)
elif size == 8:
- self.vcmpequd(res.value, l0.value, l1.value)
- self.vcmpequdx(res.value, res.value, tmp)
+ self.mc.vcmpequdx(res.value, res.value, tmp)
+ self.mc.vnor(res.value, res.value, res.value)
+ flush_vec_cc(self, regalloc, c.NE, op.bytesize, res)
- #def genop_vec_cast_float_to_int(self, op, arglocs, regalloc):
- # self.mc.CVTPD2DQ(resloc, arglocs[0])
- #def genop_vec_cast_singlefloat_to_float(self, op, arglocs, regalloc):
- # self.mc.CVTPS2PD(resloc, arglocs[0])
+ def emit_vec_expand_f(self, op, arglocs, regalloc):
+ resloc, srcloc = arglocs
+ size = op.bytesize
+ res = resloc.value
+ if isinstance(srcloc, l.ConstFloatLoc):
+ # they are aligned!
+ assert size == 8
+ tloc = regalloc.rm.get_scratch_reg()
+ self.mc.load_imm(tloc, srcloc.value)
+ self.mc.lxvd2x(res, 0, tloc.value)
+ elif size == 8:
+ self.mc.vmr(res, srcloc.value, srcloc.value)
+ else:
+ notimplemented("[ppc/assembler] vec expand in this combination not
supported")
- #def genop_vec_expand_f(self, op, arglocs, regalloc):
- # srcloc, sizeloc = arglocs
- # size = sizeloc.value
- # if isinstance(srcloc, ConstFloatLoc):
- # # they are aligned!
- # self.mc.MOVAPD(resloc, srcloc)
- # elif size == 4:
- # # the register allocator forces src to be the same as resloc
- # # r = (s[0], s[0], r[0], r[0])
- # # since resloc == srcloc: r = (r[0], r[0], r[0], r[0])
- # self.mc.SHUFPS_xxi(resloc.value, srcloc.value, 0)
- # elif size == 8:
- # self.mc.MOVDDUP(resloc, srcloc)
- # else:
- # raise AssertionError("float of size %d not supported" % (size,))
-
- #def genop_vec_expand_i(self, op, arglocs, regalloc):
- # srcloc, sizeloc = arglocs
- # if not isinstance(srcloc, RegLoc):
- # self.mov(srcloc, X86_64_SCRATCH_REG)
- # srcloc = X86_64_SCRATCH_REG
- # assert not srcloc.is_xmm
- # size = sizeloc.value
- # if size == 1:
- # self.mc.PINSRB_xri(resloc.value, srcloc.value, 0)
- # self.mc.PSHUFB(resloc, heap(self.expand_byte_mask_addr))
- # elif size == 2:
- # self.mc.PINSRW_xri(resloc.value, srcloc.value, 0)
- # self.mc.PINSRW_xri(resloc.value, srcloc.value, 4)
- # self.mc.PSHUFLW_xxi(resloc.value, resloc.value, 0)
- # self.mc.PSHUFHW_xxi(resloc.value, resloc.value, 0)
- # elif size == 4:
- # self.mc.PINSRD_xri(resloc.value, srcloc.value, 0)
- # self.mc.PSHUFD_xxi(resloc.value, resloc.value, 0)
- # elif size == 8:
- # self.mc.PINSRQ_xri(resloc.value, srcloc.value, 0)
- # self.mc.PINSRQ_xri(resloc.value, srcloc.value, 1)
- # else:
- # raise AssertionError("cannot handle size %d (int expand)" %
(size,))
+ def emit_vec_expand_i(self, op, arglocs, regalloc):
+ notimplemented("[vec expand i]")
+ srcloc, sizeloc = arglocs
+ if not isinstance(srcloc, RegLoc):
+ self.mov(srcloc, X86_64_SCRATCH_REG)
+ srcloc = X86_64_SCRATCH_REG
+ assert not srcloc.is_xmm
+ size = sizeloc.value
+ if size == 1:
+ self.mc.PINSRB_xri(resloc.value, srcloc.value, 0)
+ self.mc.PSHUFB(resloc, heap(self.expand_byte_mask_addr))
+ elif size == 2:
+ self.mc.PINSRW_xri(resloc.value, srcloc.value, 0)
+ self.mc.PINSRW_xri(resloc.value, srcloc.value, 4)
+ self.mc.PSHUFLW_xxi(resloc.value, resloc.value, 0)
+ self.mc.PSHUFHW_xxi(resloc.value, resloc.value, 0)
+ elif size == 4:
+ self.mc.PINSRD_xri(resloc.value, srcloc.value, 0)
+ self.mc.PSHUFD_xxi(resloc.value, resloc.value, 0)
+ elif size == 8:
+ self.mc.PINSRQ_xri(resloc.value, srcloc.value, 0)
+ self.mc.PINSRQ_xri(resloc.value, srcloc.value, 1)
+ else:
+ raise AssertionError("cannot handle size %d (int expand)" %
(size,))
#def genop_vec_pack_i(self, op, arglocs, regalloc):
# resultloc, sourceloc, residxloc, srcidxloc, countloc, sizeloc =
arglocs
@@ -622,6 +618,12 @@
#genop_vec_unpack_f = genop_vec_pack_f
+ # needed as soon as PPC's support_singlefloat is implemented!
+ #def genop_vec_cast_float_to_int(self, op, arglocs, regalloc):
+ # self.mc.CVTPD2DQ(resloc, arglocs[0])
+ #def genop_vec_cast_singlefloat_to_float(self, op, arglocs, regalloc):
+ # self.mc.CVTPS2PD(resloc, arglocs[0])
+
class VectorRegalloc(object):
_mixin_ = True
@@ -789,28 +791,24 @@
#prepare_vec_unpack_f = prepare_vec_unpack_i
- #def prepare_vec_expand_f(self, op):
- # assert isinstance(op, VectorOp)
- # arg = op.getarg(0)
- # args = op.getarglist()
- # if arg.is_constant():
- # resloc = self.xrm.force_allocate_reg(op)
- # srcloc = self.xrm.expand_float(op.bytesize, arg)
- # else:
- # resloc = self.xrm.force_result_in_reg(op, arg, args)
- # srcloc = resloc
- # self.perform(op, [srcloc, imm(op.bytesize)], resloc)
+ def expand_float(self, size, box):
+ adr = self.assembler.datablockwrapper.malloc_aligned(16, 16)
+ fs = box.getfloatstorage()
+ rffi.cast(rffi.CArrayPtr(longlong.FLOATSTORAGE), adr)[0] = fs
+ rffi.cast(rffi.CArrayPtr(longlong.FLOATSTORAGE), adr)[1] = fs
+ return l.ConstFloatLoc(adr)
- #def prepare_vec_expand_i(self, op):
- # assert isinstance(op, VectorOp)
- # arg = op.getarg(0)
- # args = op.getarglist()
- # if arg.is_constant():
- # srcloc = self.rm.convert_to_imm(arg)
- # else:
- # srcloc = self.make_sure_var_in_reg(arg, args)
- # resloc = self.xrm.force_allocate_reg(op, args)
- # self.perform(op, [srcloc, imm(op.bytesize)], resloc)
+ def prepare_vec_expand_f(self, op):
+ arg = op.getarg(0)
+ if arg.is_constant():
+ l0 = self.expand_float(op.bytesize, arg)
+ res = self.force_allocate_vector_reg(op)
+ else:
+ l0 = self.ensure_vector_reg(arg)
+ res = self.force_allocate_vector_reg(op)
+ return [res, l0]
+
+ prepare_vec_expand_i = prepare_vec_expand_f
def prepare_vec_int_is_true(self, op):
arg = op.getarg(0)
@@ -819,12 +817,12 @@
resloc = self.force_allocate_vector_reg(op)
return [resloc, argloc, imm(arg.bytesize)]
- #def _prepare_vec(self, op):
- # # pseudo instruction, needed to create a new variable
- # self.xrm.force_allocate_reg(op)
+ def _prepare_vec(self, op):
+ # pseudo instruction, needed to allocate a register for a new variable
+ return [self.force_allocate_vector_reg(op)]
- #prepare_vec_i = _prepare_vec
- #prepare_vec_f = _prepare_vec
+ prepare_vec_i = _prepare_vec
+ prepare_vec_f = _prepare_vec
def prepare_vec_cast_float_to_int(self, op):
l0 = self.ensure_vector_reg(op.getarg(0))
@@ -832,18 +830,15 @@
return [res, l0]
prepare_vec_cast_int_to_float = prepare_vec_cast_float_to_int
- #prepare_vec_cast_float_to_singlefloat = prepare_vec_cast_float_to_int
- #prepare_vec_cast_singlefloat_to_float = prepare_vec_cast_float_to_int
- #def prepare_vec_guard_true(self, op):
- # arg = op.getarg(0)
- # loc = self.loc(arg)
- # self.assembler.guard_vector(op, self.loc(arg), True)
- # self.perform_guard(op, [], None)
+ def _prepare_guard(self, box):
+ if self.assembler.guard_success_cc == c.cond_none:
+ notimplemented("[ppc/regalloc] guard")
+ self.assembler.guard_success_cc = c.NE
- #def prepare_vec_guard_false(self, op):
- # arg = op.getarg(0)
- # loc = self.loc(arg)
- # self.assembler.guard_vector(op, self.loc(arg), False)
- # self.perform_guard(op, [], None)
+ def prepare_vec_guard_true(self, op):
+ self._prepare_guard(op.getarg(0))
+ def prepare_vec_guard_false(self, op):
+ self._prepare_guard(op.getarg(0))
+
diff --git a/rpython/jit/metainterp/test/test_vector.py
b/rpython/jit/metainterp/test/test_vector.py
--- a/rpython/jit/metainterp/test/test_vector.py
+++ b/rpython/jit/metainterp/test/test_vector.py
@@ -265,6 +265,11 @@
test_vec_xor_short = \
vec_int_arith(lambda a,b: intmask(a)^intmask(b), rffi.SHORT)
+ test_vec_int_eq = \
+ vec_int_arith(lambda a,b: a == b, rffi.SIGNED)
+ test_vec_int_ne = \
+ vec_int_arith(lambda a,b: a == b, rffi.SIGNED)
+
@py.test.mark.parametrize('i',[1,2,3,4,9])
def test_vec_register_too_small_vector(self, i):
myjitdriver = JitDriver(greens = [],
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit