Author: Richard Plangger <planri...@gmail.com>
Branch: ppc-vsx-support
Changeset: r85434:780c12936a64
Date: 2016-06-28 16:24 +0200
http://bitbucket.org/pypy/pypy/changeset/780c12936a64/

Log:    finish the impl. of vec_float_eq/ne + test

diff --git a/rpython/jit/backend/ppc/codebuilder.py 
b/rpython/jit/backend/ppc/codebuilder.py
--- a/rpython/jit/backend/ppc/codebuilder.py
+++ b/rpython/jit/backend/ppc/codebuilder.py
@@ -683,9 +683,12 @@
     vor = VX(4, XO8=1156)
     veqv = VX(4, XO8=1668)
     vxor = VX(4, XO8=1220)
+    vnor = VX(4, XO8=1284)
 
     # vector move register is alias to vector or
     vmr = vor
+    # complement is equivalent to vnor
+    vnot = vnor
 
     # shift, perm and select
     lvsl = XV(31, XO1=6)
diff --git a/rpython/jit/backend/ppc/runner.py 
b/rpython/jit/backend/ppc/runner.py
--- a/rpython/jit/backend/ppc/runner.py
+++ b/rpython/jit/backend/ppc/runner.py
@@ -52,6 +52,7 @@
             self.vector_ext = AltiVectorExt()
             self.vector_extension = True
             # ??? self.vector_horizontal_operations = True
+            self.assembler.setup_once_vector()
 
     @rgc.no_release_gil
     def finish_once(self):
diff --git a/rpython/jit/backend/ppc/vector_ext.py 
b/rpython/jit/backend/ppc/vector_ext.py
--- a/rpython/jit/backend/ppc/vector_ext.py
+++ b/rpython/jit/backend/ppc/vector_ext.py
@@ -16,6 +16,8 @@
 from rpython.jit.backend.ppc.arch import PARAM_SAVE_AREA_OFFSET
 import rpython.jit.backend.ppc.register as r
 import rpython.jit.backend.ppc.condition as c
+from rpython.jit.backend.llsupport.asmmemmgr import MachineDataBlockWrapper
+from rpython.rtyper.lltypesystem import lltype, rffi
 
 def not_implemented(msg):
     msg = '[ppc/vector_ext] %s\n' % msg
@@ -23,12 +25,54 @@
         llop.debug_print(lltype.Void, msg)
     raise NotImplementedError(msg)
 
+def flush_vec_cc(asm, regalloc, condition, size, result_loc):
+    # After emitting an instruction that leaves a boolean result in
+    # a condition code (cc), call this.  In the common case, result_loc
+    # will be set to SPP by the regalloc, which in this case means
+    # "propagate it between this operation and the next guard by keeping
+    # it in the cc".  In the uncommon case, result_loc is another
+    # register, and we emit a load from the cc into this register.
+    assert asm.guard_success_cc == c.cond_none
+    if result_loc is r.SPP:
+        asm.guard_success_cc = condition
+    else:
+        resval = result_loc.value
+        # either doubleword integer 1 (2x) or word integer 1 (4x)
+        ones = regalloc.ivrm.get_scratch_reg().value
+        zeros = regalloc.ivrm.get_scratch_reg().value
+        asm.mc.vxor(zeros, zeros, zeros)
+        if size == 4:
+            asm.mc.vspltisw(ones, 1)
+        else:
+            assert size == 8
+            tloc = regalloc.rm.get_scratch_reg()
+            asm.mc.load_imm(tloc, asm.VEC_DOUBLE_WORD_ONES)
+            asm.mc.lvx(ones, 0, tloc.value)
+        asm.mc.vsel(resval, zeros, ones, resval)
+
 class AltiVectorExt(VectorExt):
     pass
 
 class VectorAssembler(object):
     _mixin_ = True
 
+    VEC_DOUBLE_WORD_ONES = 0
+
+    def setup_once_vector(self):
+        if IS_BIG_ENDIAN:
+            # 2x 64 bit signed integer(1) BE
+            data = (b'\x00' * 7 + b'\x01') * 2
+        else:
+            # 2x 64 bit signed integer(1) LE
+            data = (b'\x01' + b'\x00' * 7) * 2
+        datablockwrapper = MachineDataBlockWrapper(self.cpu.asmmemmgr, [])
+        mem = datablockwrapper.malloc_aligned(len(data), alignment=16)
+        datablockwrapper.done()
+        addr = rffi.cast(rffi.CArrayPtr(lltype.Char), mem)
+        for i in range(len(data)):
+            addr[i] = data[i]
+        self.VEC_DOUBLE_WORD_ONES = mem
+
     def emit_vec_load_f(self, op, arglocs, regalloc):
         resloc, baseloc, indexloc, size_loc, ofs, integer_loc, aligned_loc = 
arglocs
         #src_addr = addr_add(baseloc, ofs_loc, ofs.value, 0)
@@ -360,6 +404,7 @@
         else:
             notimplemented("[ppc/assembler] float == for size %d" % size)
         self.mc.lvx(resloc.value, off, r.SP.value)
+        flush_vec_cc(self, regalloc, c.EQ, op.bytesize, resloc)
 
     def emit_vec_float_ne(self, op, arglocs, regalloc):
         resloc, loc1, loc2, sizeloc = arglocs
@@ -371,15 +416,16 @@
         self.mc.load_imm(offloc, PARAM_SAVE_AREA_OFFSET)
         if size == 4:
             self.mc.xvcmpeqspx(tmp, loc1.value, loc2.value)
-            self.mc.xxlandc(tmp, tmp, tmp) # negate
             self.mc.stxvw4x(tmp, off, r.SP.value)
         elif size == 8:
             self.mc.xvcmpeqdpx(tmp, loc1.value, loc2.value)
-            self.mc.xxlandc(tmp, tmp, tmp) # negate
             self.mc.stxvd2x(tmp, off, r.SP.value)
         else:
             notimplemented("[ppc/assembler] float == for size %d" % size)
-        self.mc.lvx(resloc.value, off, r.SP.value)
+        res = resloc.value
+        self.mc.lvx(res, off, r.SP.value)
+        self.mc.vnor(res, res, res) # complement
+        flush_vec_cc(self, regalloc, c.NE, op.bytesize, resloc)
 
     def emit_vec_cast_int_to_float(self, op, arglocs, regalloc):
         res, l0 = arglocs
diff --git a/rpython/jit/metainterp/test/test_vector.py 
b/rpython/jit/metainterp/test/test_vector.py
--- a/rpython/jit/metainterp/test/test_vector.py
+++ b/rpython/jit/metainterp/test/test_vector.py
@@ -185,6 +185,8 @@
 
     test_vec_float_cmp_eq = \
         vec_float_binary(lambda a,b: a == b, rffi.DOUBLE)
+    test_vec_float_cmp_ne = \
+        vec_float_binary(lambda a,b: a != b, rffi.DOUBLE)
 
     def _vector_simple_int(self, func, type, data):
         func = always_inline(func)
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to