gc_load) is done

plan_rich Mon, 27 Jun 2016 07:04:49 -0700

Author: Richard Plangger <[email protected]>
Branch: ppc-vsx-support
Changeset: r85398:3a5a728d551e
Date: 2016-06-27 15:59 +0200
http://bitbucket.org/pypy/pypy/changeset/3a5a728d551e/


Log:    refactoring of vec_load/vec_store, now the same simplification (as
        e.g. raw_load -> gc_load) is done arch dependant code now moved the
        backend

diff --git a/rpython/jit/backend/llsupport/llmodel.py 
b/rpython/jit/backend/llsupport/llmodel.py
--- a/rpython/jit/backend/llsupport/llmodel.py
+++ b/rpython/jit/backend/llsupport/llmodel.py
@@ -35,6 +35,7 @@
     # can an ISA instruction handle a factor to the offset?
     load_supported_factors = (1,)
 
+    vector_ext = None
     vector_extension = False
     vector_register_size = 0 # in bytes
     vector_horizontal_operations = False
diff --git a/rpython/jit/backend/llsupport/rewrite.py 
b/rpython/jit/backend/llsupport/rewrite.py
--- a/rpython/jit/backend/llsupport/rewrite.py
+++ b/rpython/jit/backend/llsupport/rewrite.py
@@ -156,32 +156,12 @@
         index_box = op.getarg(1)
         self.emit_gc_load_or_indexed(op, ptr_box, index_box, itemsize, 
itemsize, ofs, sign)
 
-    def handle_rawload(self, op):
-        itemsize, ofs, sign = unpack_arraydescr(op.getdescr())
-        ptr_box = op.getarg(0)
-        index_box = op.getarg(1)
-        self.emit_gc_load_or_indexed(op, ptr_box, index_box, itemsize, 1, ofs, 
sign)
-
     def _emit_mul_if_factor_offset_not_supported(self, index_box,
                                                  factor, offset):
-        # Returns (factor, offset, index_box) where index_box is either
-        # a non-constant BoxInt or None.
-        if isinstance(index_box, ConstInt):
-            return 1, index_box.value * factor + offset, None
-        else:
-            if factor != 1 and factor not in self.cpu.load_supported_factors:
-                # the factor is supported by the cpu
-                # x & (x - 1) == 0 is a quick test for power of 2
-                assert factor > 0
-                if (factor & (factor - 1)) == 0:
-                    index_box = ResOperation(rop.INT_LSHIFT,
-                            [index_box, ConstInt(highest_bit(factor))])
-                else:
-                    index_box = ResOperation(rop.INT_MUL,
-                            [index_box, ConstInt(factor)])
-                self.emit_op(index_box)
-                factor = 1
-            return factor, offset, index_box
+        factor, offset, index_box = cpu_simplify_scale(self.cpu, indexbox, 
factor, offset)
+        if index_box:
+            self.emit_op(index_box)
+        return factor, offset, index_box
 
     def emit_gc_load_or_indexed(self, op, ptr_box, index_box, itemsize,
                                 factor, offset, sign, type='i'):
@@ -985,3 +965,23 @@
             self._newops.append(load_op)
             self.gcrefs_recently_loaded[index] = load_op
         return load_op
+
+def cpu_simplify_scale(cpu, index_box, factor, offset):
+    # Returns (factor, offset, index_box, [ops]) where index_box is either
+    # a non-constant BoxInt or None.
+    if isinstance(index_box, ConstInt):
+        return 1, index_box.value * factor + offset, None
+    else:
+        if factor != 1 and factor not in cpu.load_supported_factors:
+            # the factor is supported by the cpu
+            # x & (x - 1) == 0 is a quick test for power of 2
+            assert factor > 0
+            if (factor & (factor - 1)) == 0:
+                index_box = ResOperation(rop.INT_LSHIFT,
+                        [index_box, ConstInt(highest_bit(factor))])
+            else:
+                index_box = ResOperation(rop.INT_MUL,
+                        [index_box, ConstInt(factor)])
+            factor = 1
+        return factor, offset, index_box
+
diff --git a/rpython/jit/backend/llsupport/vector_ext.py 
b/rpython/jit/backend/llsupport/vector_ext.py
new file mode 100644
--- /dev/null
+++ b/rpython/jit/backend/llsupport/vector_ext.py
@@ -0,0 +1,235 @@
+from rpython.jit.backend.llsupport.descr import (unpack_arraydescr,
+        unpack_fielddescr, unpack_interiorfielddescr)
+from rpython.rlib.objectmodel import specialize, always_inline
+from rpython.jit.metainterp.history import (VECTOR, FLOAT, INT)
+from rpython.jit.metainterp.resoperation import rop
+
+class TypeRestrict(object):
+    ANY_TYPE = '\x00'
+    ANY_SIZE = -1
+    ANY_SIGN = -1
+    ANY_COUNT = -1
+    SIGNED = 1
+    UNSIGNED = 0
+
+    def __init__(self,
+                 type=ANY_TYPE,
+                 bytesize=ANY_SIZE,
+                 count=ANY_SIGN,
+                 sign=ANY_COUNT):
+        self.type = type
+        self.bytesize = bytesize
+        self.sign = sign
+        self.count = count
+
+    @always_inline
+    def any_size(self):
+        return self.bytesize == TypeRestrict.ANY_SIZE
+
+    @always_inline
+    def any_count(self):
+        return self.count == TypeRestrict.ANY_COUNT
+
+    def check(self, value):
+        vecinfo = forwarded_vecinfo(value)
+        assert vecinfo.datatype != '\x00'
+        if self.type != TypeRestrict.ANY_TYPE:
+            if self.type != vecinfo.datatype:
+                msg = "type mismatch %s != %s" % \
+                        (self.type, vecinfo.datatype)
+                failnbail_transformation(msg)
+        assert vecinfo.bytesize > 0
+        if not self.any_size():
+            if self.bytesize != vecinfo.bytesize:
+                msg = "bytesize mismatch %s != %s" % \
+                        (self.bytesize, vecinfo.bytesize)
+                failnbail_transformation(msg)
+        assert vecinfo.count > 0
+        if self.count != TypeRestrict.ANY_COUNT:
+            if vecinfo.count < self.count:
+                msg = "count mismatch %s < %s" % \
+                        (self.count, vecinfo.count)
+                failnbail_transformation(msg)
+        if self.sign != TypeRestrict.ANY_SIGN:
+            if bool(self.sign) == vecinfo.sign:
+                msg = "sign mismatch %s < %s" % \
+                        (self.sign, vecinfo.sign)
+                failnbail_transformation(msg)
+
+    def max_input_count(self, count):
+        """ How many """
+        if self.count != TypeRestrict.ANY_COUNT:
+            return self.count
+        return count
+
+class OpRestrict(object):
+    def __init__(self, argument_restris):
+        self.argument_restrictions = argument_restris
+
+    def check_operation(self, state, pack, op):
+        pass
+
+    def crop_vector(self, op, newsize, size):
+        return newsize, size
+
+    def must_crop_vector(self, op, index):
+        restrict = self.argument_restrictions[index]
+        vecinfo = forwarded_vecinfo(op.getarg(index))
+        size = vecinfo.bytesize
+        newsize = self.crop_to_size(op, index)
+        return not restrict.any_size() and newsize != size
+
+    @always_inline
+    def crop_to_size(self, op, index):
+        restrict = self.argument_restrictions[index]
+        return restrict.bytesize
+
+    def opcount_filling_vector_register(self, op, vec_reg_size):
+        """ How many operations of that kind can one execute
+            with a machine instruction of register size X?
+        """
+        if op.is_typecast():
+            if op.casts_down():
+                size = op.cast_input_bytesize(vec_reg_size)
+                return size // op.cast_from_bytesize()
+            else:
+                return vec_reg_size // op.cast_to_bytesize()
+        vecinfo = forwarded_vecinfo(op)
+        return  vec_reg_size // vecinfo.bytesize
+
+class GuardRestrict(OpRestrict):
+    def opcount_filling_vector_register(self, op, vec_reg_size):
+        arg = op.getarg(0)
+        vecinfo = forwarded_vecinfo(arg)
+        return vec_reg_size // vecinfo.bytesize
+
+class LoadRestrict(OpRestrict):
+    def check_operation(self, state, pack, op):
+        opnum = op.getopnum()
+        if rop.is_getarrayitem(opnum) or \
+             opnum in (rop.GETARRAYITEM_RAW_I, rop.GETARRAYITEM_RAW_F):
+            itemsize, ofs, sign = unpack_arraydescr(op.getdescr())
+            index_box = op.getarg(1)
+            _, _, changed = cpu_simplify_scale(state.cpu, index_box, itemsize, 
ofs)
+            if changed is not index_box:
+                state.oplist.append(changed)
+                op.setarg(1, changed)
+
+    def opcount_filling_vector_register(self, op, vec_reg_size):
+        assert rop.is_primitive_load(op.opnum)
+        descr = op.getdescr()
+        return vec_reg_size // descr.get_item_size_in_bytes()
+
+class StoreRestrict(OpRestrict):
+    def __init__(self, argument_restris):
+        self.argument_restrictions = argument_restris
+
+    def check_operation(self, state, pack, op):
+        opnum = op.getopnum()
+        if opnum in (rop.SETARRAYITEM_GC, rop.SETARRAYITEM_RAW):
+            itemsize, basesize, _ = unpack_arraydescr(op.getdescr())
+            index_box = op.getarg(1)
+            _, _, changed = cpu_simplify_scale(index_box, itemsize, basesize)
+            if changed is not index_box:
+                state.oplist.append(changed)
+                op.setarg(1, changed)
+
+    def must_crop_vector(self, op, index):
+        vecinfo = forwarded_vecinfo(op.getarg(index))
+        bytesize = vecinfo.bytesize
+        return self.crop_to_size(op, index) != bytesize
+
+    @always_inline
+    def crop_to_size(self, op, index):
+        # there is only one parameter that needs to be transformed!
+        descr = op.getdescr()
+        return descr.get_item_size_in_bytes()
+
+    def opcount_filling_vector_register(self, op, vec_reg_size):
+        assert rop.is_primitive_store(op.opnum)
+        descr = op.getdescr()
+        return vec_reg_size // descr.get_item_size_in_bytes()
+
+class OpMatchSizeTypeFirst(OpRestrict):
+    def check_operation(self, state, pack, op):
+        i = 0
+        infos = [forwarded_vecinfo(o) for o in op.getarglist()]
+        arg0 = op.getarg(i)
+        while arg0.is_constant() and i < op.numargs():
+            i += 1
+            arg0 = op.getarg(i)
+        vecinfo = forwarded_vecinfo(arg0)
+        bytesize = vecinfo.bytesize
+        datatype = vecinfo.datatype
+
+        for arg in op.getarglist():
+            if arg.is_constant():
+                continue
+            curvecinfo = forwarded_vecinfo(arg)
+            if curvecinfo.bytesize != bytesize:
+                raise NotAVectorizeableLoop()
+            if curvecinfo.datatype != datatype:
+                raise NotAVectorizeableLoop()
+
+TR_ANY = TypeRestrict()
+TR_ANY_FLOAT = TypeRestrict(FLOAT)
+TR_ANY_INTEGER = TypeRestrict(INT)
+TR_FLOAT_2 = TypeRestrict(FLOAT, 4, 2)
+TR_DOUBLE_2 = TypeRestrict(FLOAT, 8, 2)
+TR_INT32_2 = TypeRestrict(INT, 4, 2)
+
+OR_MSTF_I = OpMatchSizeTypeFirst([TR_ANY_INTEGER, TR_ANY_INTEGER])
+OR_MSTF_F = OpMatchSizeTypeFirst([TR_ANY_FLOAT, TR_ANY_FLOAT])
+STORE_RESTRICT = StoreRestrict([None, None, TR_ANY])
+LOAD_RESTRICT = LoadRestrict([])
+GUARD_RESTRICT = GuardRestrict([TR_ANY_INTEGER])
+
+
+class VectorExt(object):
+
+    # note that the following definition is x86 arch specific
+    TR_MAPPING = {
+        rop.VEC_INT_ADD:            OR_MSTF_I,
+        rop.VEC_INT_SUB:            OR_MSTF_I,
+        rop.VEC_INT_MUL:            OR_MSTF_I,
+        rop.VEC_INT_AND:            OR_MSTF_I,
+        rop.VEC_INT_OR:             OR_MSTF_I,
+        rop.VEC_INT_XOR:            OR_MSTF_I,
+        rop.VEC_INT_EQ:             OR_MSTF_I,
+        rop.VEC_INT_NE:             OR_MSTF_I,
+
+        rop.VEC_FLOAT_ADD:          OR_MSTF_F,
+        rop.VEC_FLOAT_SUB:          OR_MSTF_F,
+        rop.VEC_FLOAT_MUL:          OR_MSTF_F,
+        rop.VEC_FLOAT_TRUEDIV:      OR_MSTF_F,
+        rop.VEC_FLOAT_ABS:          OpRestrict([TR_ANY_FLOAT]),
+        rop.VEC_FLOAT_NEG:          OpRestrict([TR_ANY_FLOAT]),
+
+        rop.VEC_STORE:              STORE_RESTRICT,
+
+        rop.VEC_LOAD_I:             LOAD_RESTRICT,
+        rop.VEC_LOAD_F:             LOAD_RESTRICT,
+
+        rop.VEC_GUARD_TRUE:             GUARD_RESTRICT,
+        rop.VEC_GUARD_FALSE:            GUARD_RESTRICT,
+
+        ## irregular
+        rop.VEC_INT_SIGNEXT:        OpRestrict([TR_ANY_INTEGER]),
+
+        rop.VEC_CAST_FLOAT_TO_SINGLEFLOAT:  OpRestrict([TR_DOUBLE_2]),
+        # weird but the trace will store single floats in int boxes
+        rop.VEC_CAST_SINGLEFLOAT_TO_FLOAT:  OpRestrict([TR_INT32_2]),
+        rop.VEC_CAST_FLOAT_TO_INT:          OpRestrict([TR_DOUBLE_2]),
+        rop.VEC_CAST_INT_TO_FLOAT:          OpRestrict([TR_INT32_2]),
+
+        rop.VEC_FLOAT_EQ:           OpRestrict([TR_ANY_FLOAT,TR_ANY_FLOAT]),
+        rop.VEC_FLOAT_NE:           OpRestrict([TR_ANY_FLOAT,TR_ANY_FLOAT]),
+        rop.VEC_INT_IS_TRUE:        
OpRestrict([TR_ANY_INTEGER,TR_ANY_INTEGER]),
+    }
+
+    def get_operation_restriction(self, op):
+        res = self.TR_MAPPING.get(op.vector, None)
+        if not res:
+            failnbail_transformation("could not get OpRestrict for " + str(op))
+        return res
+
diff --git a/rpython/jit/backend/ppc/codebuilder.py 
b/rpython/jit/backend/ppc/codebuilder.py
--- a/rpython/jit/backend/ppc/codebuilder.py
+++ b/rpython/jit/backend/ppc/codebuilder.py
@@ -62,8 +62,9 @@
 XFX = Form("CRM", "rS", "XO1")
 XLL = Form("LL", "XO1")
 XX1 = Form("fvrT", "rA", "rB", "XO1")
-XX2 = Form("fvrT", "fvrB", "XO5")
+XX2 = Form("fvrT", "fvrB", "XO6")
 XX3 = Form("fvrT", "fvrA", "fvrB", "XO9")
+XX3_2 = Form("fvrT", "fvrA", "fvrB", "OE", "XO11")
 XV = Form("ivrT", "rA", "rB", "XO1")
 VX = Form("ivrT", "ivrA", "ivrB", "XO8")
 VXI = Form("ivrT", "SIM", "XO8")
@@ -611,14 +612,22 @@
     # div
     xvdivdp = XX3(60, XO9=102)
     xvdivsp = XX3(60, XO9=88)
+    # cmp
+    xvcmpeqdp = XX3_2(60, XO11=99, OE=0)
+    xvcmpeqdpx = XX3_2(60, XO11=99, OE=1)
+    xvcmpeqsp = XX3_2(60, XO11=67, OE=0)
+    xvcmpeqspx = XX3_2(60, XO11=67, OE=1)
+
+    # logical and and complement
+    xxlandc = XX3(60, XO9=138)
 
     # neg
-    xvnegdp = XX2(60, XO5=505)
-    xvabssp = XX2(60, XO5=441)
+    xvnegdp = XX2(60, XO6=505)
+    xvnegsp = XX2(60, XO6=441)
 
     # abs
-    xvabsdp = XX2(60, XO5=473)
-    xvabssp = XX2(60, XO5=409)
+    xvabsdp = XX2(60, XO6=473)
+    xvabssp = XX2(60, XO6=409)
 
     # INTEGER
     # -------
diff --git a/rpython/jit/backend/ppc/ppc_field.py 
b/rpython/jit/backend/ppc/ppc_field.py
--- a/rpython/jit/backend/ppc/ppc_field.py
+++ b/rpython/jit/backend/ppc/ppc_field.py
@@ -64,6 +64,7 @@
     "XO8":    (21, 31),
     "XO9":    (21, 28),
     "XO10":   (26, 31),
+    "XO11":   (22, 28),
     "LL":     ( 9, 10),
     "SIM":    (11, 15),
 }
diff --git a/rpython/jit/backend/ppc/runner.py 
b/rpython/jit/backend/ppc/runner.py
--- a/rpython/jit/backend/ppc/runner.py
+++ b/rpython/jit/backend/ppc/runner.py
@@ -3,6 +3,7 @@
 from rpython.rlib import rgc
 from rpython.rlib.jit_hooks import LOOP_RUN_CONTAINER
 from rpython.jit.backend.llsupport.llmodel import AbstractLLCPU
+from rpython.jit.backend.ppc.vector_ext import AltiVectorExt
 from rpython.jit.backend.ppc.ppc_assembler import AssemblerPPC
 from rpython.jit.backend.ppc.arch import WORD
 from rpython.jit.backend.ppc.codebuilder import PPCBuilder
@@ -11,6 +12,7 @@
 
 class PPC_CPU(AbstractLLCPU):
 
+    vector_ext = None
     vector_extension = False # may be set to true in setup
     vector_register_size = 16
     vector_horizontal_operations = False
@@ -47,6 +49,7 @@
     def setup_once(self):
         self.assembler.setup_once()
         if detect_vsx():
+            self.vector_ext = AltiVectorExt()
             self.vector_extension = True
             # ??? self.vector_horizontal_operations = True
 
diff --git a/rpython/jit/backend/ppc/vector_ext.py 
b/rpython/jit/backend/ppc/vector_ext.py
--- a/rpython/jit/backend/ppc/vector_ext.py
+++ b/rpython/jit/backend/ppc/vector_ext.py
@@ -12,6 +12,7 @@
 from rpython.rtyper.lltypesystem import lltype
 from rpython.jit.backend.ppc.locations import imm
 from rpython.jit.backend.ppc.arch import IS_BIG_ENDIAN
+from rpython.jit.backend.llsupport.vector_ext import VectorExt
 
 def not_implemented(msg):
     msg = '[ppc/vector_ext] %s\n' % msg
@@ -19,6 +20,9 @@
         llop.debug_print(lltype.Void, msg)
     raise NotImplementedError(msg)
 
+class AltiVectorExt(VectorExt):
+    pass
+
 class VectorAssembler(object):
     _mixin_ = True
 
@@ -348,22 +352,20 @@
     #    # entries before) become ones
     #    self.mc.PCMPEQ(loc, temp, sizeloc.value)
 
-    #def genop_vec_float_eq(self, op, arglocs, resloc):
-    #    _, rhsloc, sizeloc = arglocs
-    #    size = sizeloc.value
-    #    if size == 4:
-    #        self.mc.CMPPS_xxi(resloc.value, rhsloc.value, 0) # 0 means equal
-    #    else:
-    #        self.mc.CMPPD_xxi(resloc.value, rhsloc.value, 0)
+    def emit_vec_float_eq(self, op, arglocs, resloc):
+        resloc, loc1, loc2, sizeloc = arglocs
+        size = sizeloc.value
+        if size == 4:
+            self.mc.xvcmpeqspx(resloc.value, loc1.value, loc2.value)
+        elif size == 8:
+            self.mc.xvcmpeqdpx(resloc.value, loc1.value, loc2.value)
+        else:
+            notimplemented("[ppc/assembler] float == for size %d" % size)
 
-    #def genop_vec_float_ne(self, op, arglocs, resloc):
-    #    _, rhsloc, sizeloc = arglocs
-    #    size = sizeloc.value
-    #    # b(100) == 1 << 2 means not equal
-    #    if size == 4:
-    #        self.mc.CMPPS_xxi(resloc.value, rhsloc.value, 1 << 2)
-    #    else:
-    #        self.mc.CMPPD_xxi(resloc.value, rhsloc.value, 1 << 2)
+    def emit_vec_float_ne(self, op, arglocs, resloc):
+        self.emit_vec_float_eq(op, arglocs, resloc)
+        resloc, loc1, loc2, sizeloc = arglocs
+        self.mc.xxlandc(resloc.value, resloc.value, resloc.value)
 
     #def genop_vec_int_eq(self, op, arglocs, resloc):
     #    _, rhsloc, sizeloc = arglocs
@@ -629,8 +631,14 @@
     prepare_vec_int_and = prepare_vec_arith
     prepare_vec_int_or = prepare_vec_arith
     prepare_vec_int_xor = prepare_vec_arith
+
+    prepare_vec_float_eq = prepare_vec_arith
+    prepare_vec_float_ne = prepare_vec_float_eq
+    prepare_vec_int_eq = prepare_vec_float_eq
+    prepare_vec_int_ne = prepare_vec_float_eq
     del prepare_vec_arith
 
+
     def _prepare_vec_store(self, op):
         descr = op.getdescr()
         assert isinstance(descr, ArrayDescr)
@@ -661,8 +669,6 @@
         resloc = self.force_allocate_vector_reg(op)
         return [resloc, loc0]
 
-
-
     def prepare_vec_arith_unary(self, op):
         a0 = op.getarg(0)
         loc0 = self.ensure_vector_reg(a0)
@@ -674,19 +680,6 @@
     prepare_vec_float_abs = prepare_vec_arith_unary
     del prepare_vec_arith_unary
 
-    #def prepare_vec_float_eq(self, op):
-    #    assert isinstance(op, VectorOp)
-    #    lhs = op.getarg(0)
-    #    assert isinstance(lhs, VectorOp)
-    #    args = op.getarglist()
-    #    rhsloc = self.make_sure_var_in_reg(op.getarg(1), args)
-    #    lhsloc = self.xrm.force_result_in_reg(op, op.getarg(0), args)
-    #    self.perform(op, [lhsloc, rhsloc, imm(lhs.bytesize)], lhsloc)
-
-    #prepare_vec_float_ne = prepare_vec_float_eq
-    #prepare_vec_int_eq = prepare_vec_float_eq
-    #prepare_vec_int_ne = prepare_vec_float_eq
-
     #def prepare_vec_pack_i(self, op):
     #    # new_res = vec_pack_i(res, src, index, count)
     #    assert isinstance(op, VectorOp)
diff --git a/rpython/jit/metainterp/executor.py 
b/rpython/jit/metainterp/executor.py
--- a/rpython/jit/metainterp/executor.py
+++ b/rpython/jit/metainterp/executor.py
@@ -393,19 +393,13 @@
                          rop.SAVE_EXC_CLASS,
                          rop.SAVE_EXCEPTION,
                          rop.RESTORE_EXCEPTION,
-                         rop.VEC_RAW_LOAD_I,
-                         rop.VEC_RAW_LOAD_F,
-                         rop.VEC_RAW_STORE,
-                         rop.VEC_GETARRAYITEM_RAW_I,
-                         rop.VEC_GETARRAYITEM_RAW_F,
-                         rop.VEC_SETARRAYITEM_RAW,
-                         rop.VEC_GETARRAYITEM_GC_I,
-                         rop.VEC_GETARRAYITEM_GC_F,
-                         rop.VEC_SETARRAYITEM_GC,
+                         rop.VEC_LOAD_I,
+                         rop.VEC_LOAD_F,
                          rop.GC_LOAD_I,
                          rop.GC_LOAD_R,
                          rop.GC_LOAD_F,
                          rop.GC_LOAD_INDEXED_R,
+                         rop.VEC_STORE,
                          rop.GC_STORE,
                          rop.GC_STORE_INDEXED,
                          rop.LOAD_FROM_GC_TABLE,
diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py 
b/rpython/jit/metainterp/optimizeopt/schedule.py
--- a/rpython/jit/metainterp/optimizeopt/schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/schedule.py
@@ -1,3 +1,4 @@
+from rpython.jit.backend.llsupport.rewrite import cpu_simplify_scale
 from rpython.jit.metainterp.history import (VECTOR, FLOAT, INT,
         ConstInt, ConstFloat, TargetToken)
 from rpython.jit.metainterp.resoperation import (rop, ResOperation,
@@ -26,7 +27,8 @@
     return fwd
 
 class SchedulerState(object):
-    def __init__(self, graph):
+    def __init__(self, cpu, graph):
+        self.cpu = cpu
         self.renamer = Renamer()
         self.graph = graph
         self.oplist = []
@@ -211,230 +213,16 @@
         import pdb; pdb.set_trace()
     raise NotImplementedError(msg)
 
-class TypeRestrict(object):
-    ANY_TYPE = '\x00'
-    ANY_SIZE = -1
-    ANY_SIGN = -1
-    ANY_COUNT = -1
-    SIGNED = 1
-    UNSIGNED = 0
-
-    def __init__(self,
-                 type=ANY_TYPE,
-                 bytesize=ANY_SIZE,
-                 count=ANY_SIGN,
-                 sign=ANY_COUNT):
-        self.type = type
-        self.bytesize = bytesize
-        self.sign = sign
-        self.count = count
-
-    @always_inline
-    def any_size(self):
-        return self.bytesize == TypeRestrict.ANY_SIZE
-
-    @always_inline
-    def any_count(self):
-        return self.count == TypeRestrict.ANY_COUNT
-
-    def check(self, value):
-        vecinfo = forwarded_vecinfo(value)
-        assert vecinfo.datatype != '\x00'
-        if self.type != TypeRestrict.ANY_TYPE:
-            if self.type != vecinfo.datatype:
-                msg = "type mismatch %s != %s" % \
-                        (self.type, vecinfo.datatype)
-                failnbail_transformation(msg)
-        assert vecinfo.bytesize > 0
-        if not self.any_size():
-            if self.bytesize != vecinfo.bytesize:
-                msg = "bytesize mismatch %s != %s" % \
-                        (self.bytesize, vecinfo.bytesize)
-                failnbail_transformation(msg)
-        assert vecinfo.count > 0
-        if self.count != TypeRestrict.ANY_COUNT:
-            if vecinfo.count < self.count:
-                msg = "count mismatch %s < %s" % \
-                        (self.count, vecinfo.count)
-                failnbail_transformation(msg)
-        if self.sign != TypeRestrict.ANY_SIGN:
-            if bool(self.sign) == vecinfo.sign:
-                msg = "sign mismatch %s < %s" % \
-                        (self.sign, vecinfo.sign)
-                failnbail_transformation(msg)
-
-    def max_input_count(self, count):
-        """ How many """
-        if self.count != TypeRestrict.ANY_COUNT:
-            return self.count
-        return count
-
-class OpRestrict(object):
-    def __init__(self, argument_restris):
-        self.argument_restrictions = argument_restris
-
-    def check_operation(self, state, pack, op):
-        pass
-
-    def crop_vector(self, op, newsize, size):
-        return newsize, size
-
-    def must_crop_vector(self, op, index):
-        restrict = self.argument_restrictions[index]
-        vecinfo = forwarded_vecinfo(op.getarg(index))
-        size = vecinfo.bytesize
-        newsize = self.crop_to_size(op, index)
-        return not restrict.any_size() and newsize != size
-
-    @always_inline
-    def crop_to_size(self, op, index):
-        restrict = self.argument_restrictions[index]
-        return restrict.bytesize
-
-    def opcount_filling_vector_register(self, op, vec_reg_size):
-        """ How many operations of that kind can one execute
-            with a machine instruction of register size X?
-        """
-        if op.is_typecast():
-            if op.casts_down():
-                size = op.cast_input_bytesize(vec_reg_size)
-                return size // op.cast_from_bytesize()
-            else:
-                return vec_reg_size // op.cast_to_bytesize()
-        vecinfo = forwarded_vecinfo(op)
-        return  vec_reg_size // vecinfo.bytesize
-
-class GuardRestrict(OpRestrict):
-    def opcount_filling_vector_register(self, op, vec_reg_size):
-        arg = op.getarg(0)
-        vecinfo = forwarded_vecinfo(arg)
-        return vec_reg_size // vecinfo.bytesize
-
-class LoadRestrict(OpRestrict):
-    def opcount_filling_vector_register(self, op, vec_reg_size):
-        assert rop.is_primitive_load(op.opnum)
-        descr = op.getdescr()
-        return vec_reg_size // descr.get_item_size_in_bytes()
-
-class StoreRestrict(OpRestrict):
-    def __init__(self, argument_restris):
-        self.argument_restrictions = argument_restris
-
-    def must_crop_vector(self, op, index):
-        vecinfo = forwarded_vecinfo(op.getarg(index))
-        bytesize = vecinfo.bytesize
-        return self.crop_to_size(op, index) != bytesize
-
-    @always_inline
-    def crop_to_size(self, op, index):
-        # there is only one parameter that needs to be transformed!
-        descr = op.getdescr()
-        return descr.get_item_size_in_bytes()
-
-    def opcount_filling_vector_register(self, op, vec_reg_size):
-        assert rop.is_primitive_store(op.opnum)
-        descr = op.getdescr()
-        return vec_reg_size // descr.get_item_size_in_bytes()
-
-class OpMatchSizeTypeFirst(OpRestrict):
-    def check_operation(self, state, pack, op):
-        i = 0
-        infos = [forwarded_vecinfo(o) for o in op.getarglist()]
-        arg0 = op.getarg(i)
-        while arg0.is_constant() and i < op.numargs():
-            i += 1
-            arg0 = op.getarg(i)
-        vecinfo = forwarded_vecinfo(arg0)
-        bytesize = vecinfo.bytesize
-        datatype = vecinfo.datatype
-
-        for arg in op.getarglist():
-            if arg.is_constant():
-                continue
-            curvecinfo = forwarded_vecinfo(arg)
-            if curvecinfo.bytesize != bytesize:
-                raise NotAVectorizeableLoop()
-            if curvecinfo.datatype != datatype:
-                raise NotAVectorizeableLoop()
-
-class trans(object):
-
-    TR_ANY = TypeRestrict()
-    TR_ANY_FLOAT = TypeRestrict(FLOAT)
-    TR_ANY_INTEGER = TypeRestrict(INT)
-    TR_FLOAT_2 = TypeRestrict(FLOAT, 4, 2)
-    TR_DOUBLE_2 = TypeRestrict(FLOAT, 8, 2)
-    TR_INT32_2 = TypeRestrict(INT, 4, 2)
-
-    OR_MSTF_I = OpMatchSizeTypeFirst([TR_ANY_INTEGER, TR_ANY_INTEGER])
-    OR_MSTF_F = OpMatchSizeTypeFirst([TR_ANY_FLOAT, TR_ANY_FLOAT])
-    STORE_RESTRICT = StoreRestrict([None, None, TR_ANY])
-    LOAD_RESTRICT = LoadRestrict([])
-    GUARD_RESTRICT = GuardRestrict([TR_ANY_INTEGER])
-
-    # note that the following definition is x86 arch specific
-    MAPPING = {
-        rop.VEC_INT_ADD:            OR_MSTF_I,
-        rop.VEC_INT_SUB:            OR_MSTF_I,
-        rop.VEC_INT_MUL:            OR_MSTF_I,
-        rop.VEC_INT_AND:            OR_MSTF_I,
-        rop.VEC_INT_OR:             OR_MSTF_I,
-        rop.VEC_INT_XOR:            OR_MSTF_I,
-        rop.VEC_INT_EQ:             OR_MSTF_I,
-        rop.VEC_INT_NE:             OR_MSTF_I,
-
-        rop.VEC_FLOAT_ADD:          OR_MSTF_F,
-        rop.VEC_FLOAT_SUB:          OR_MSTF_F,
-        rop.VEC_FLOAT_MUL:          OR_MSTF_F,
-        rop.VEC_FLOAT_TRUEDIV:      OR_MSTF_F,
-        rop.VEC_FLOAT_ABS:          OpRestrict([TR_ANY_FLOAT]),
-        rop.VEC_FLOAT_NEG:          OpRestrict([TR_ANY_FLOAT]),
-
-        rop.VEC_RAW_STORE:          STORE_RESTRICT,
-        rop.VEC_SETARRAYITEM_RAW:   STORE_RESTRICT,
-        rop.VEC_SETARRAYITEM_GC:    STORE_RESTRICT,
-
-        rop.VEC_RAW_LOAD_I:         LOAD_RESTRICT,
-        rop.VEC_RAW_LOAD_F:         LOAD_RESTRICT,
-        rop.VEC_GETARRAYITEM_RAW_I: LOAD_RESTRICT,
-        rop.VEC_GETARRAYITEM_RAW_F: LOAD_RESTRICT,
-        rop.VEC_GETARRAYITEM_GC_I:  LOAD_RESTRICT,
-        rop.VEC_GETARRAYITEM_GC_F:  LOAD_RESTRICT,
-
-        rop.VEC_GUARD_TRUE:             GUARD_RESTRICT,
-        rop.VEC_GUARD_FALSE:            GUARD_RESTRICT,
-
-        ## irregular
-        rop.VEC_INT_SIGNEXT:        OpRestrict([TR_ANY_INTEGER]),
-
-        rop.VEC_CAST_FLOAT_TO_SINGLEFLOAT:  OpRestrict([TR_DOUBLE_2]),
-        # weird but the trace will store single floats in int boxes
-        rop.VEC_CAST_SINGLEFLOAT_TO_FLOAT:  OpRestrict([TR_INT32_2]),
-        rop.VEC_CAST_FLOAT_TO_INT:          OpRestrict([TR_DOUBLE_2]),
-        rop.VEC_CAST_INT_TO_FLOAT:          OpRestrict([TR_INT32_2]),
-
-        rop.VEC_FLOAT_EQ:           OpRestrict([TR_ANY_FLOAT,TR_ANY_FLOAT]),
-        rop.VEC_FLOAT_NE:           OpRestrict([TR_ANY_FLOAT,TR_ANY_FLOAT]),
-        rop.VEC_INT_IS_TRUE:        
OpRestrict([TR_ANY_INTEGER,TR_ANY_INTEGER]),
-    }
-
-    @staticmethod
-    def get(op):
-        res = trans.MAPPING.get(op.vector, None)
-        if not res:
-            failnbail_transformation("could not get OpRestrict for " + str(op))
-        return res
-
 def turn_into_vector(state, pack):
     """ Turn a pack into a vector instruction """
     check_if_pack_supported(state, pack)
     state.costmodel.record_pack_savings(pack, pack.numops())
     left = pack.leftmost()
-    oprestrict = trans.get(left)
+    oprestrict = state.cpu.vector_ext.get_operation_restriction(left)
     if oprestrict is not None:
         oprestrict.check_operation(state, pack, left)
     args = left.getarglist_copy()
-    prepare_arguments(state, pack, args)
+    prepare_arguments(state, oprestrict, pack, args)
     vecop = VecOperation(left.vector, args, left,
                          pack.numops(), left.getdescr())
     for i,node in enumerate(pack.operations):
@@ -449,7 +237,7 @@
     state.oplist.append(vecop)
     assert vecop.count >= 1
 
-def prepare_arguments(state, pack, args):
+def prepare_arguments(state, oprestrict, pack, args):
     # Transforming one argument to a vector box argument
     # The following cases can occur:
     # 1) argument is present in the box_to_vbox map.
@@ -461,7 +249,6 @@
     #    a) expand vars/consts before the label and add as argument
     #    b) expand vars created in the loop body
     #
-    oprestrict = trans.MAPPING.get(pack.leftmost().vector, None)
     if not oprestrict:
         return
     restrictions = oprestrict.argument_restrictions
@@ -684,9 +471,8 @@
 
 class VecScheduleState(SchedulerState):
     def __init__(self, graph, packset, cpu, costmodel):
-        SchedulerState.__init__(self, graph)
+        SchedulerState.__init__(self, cpu, graph)
         self.box_to_vbox = {}
-        self.cpu = cpu
         self.vec_reg_size = cpu.vector_register_size
         self.expanded_map = {}
         self.costmodel = costmodel
diff --git a/rpython/jit/metainterp/optimizeopt/vector.py 
b/rpython/jit/metainterp/optimizeopt/vector.py
--- a/rpython/jit/metainterp/optimizeopt/vector.py
+++ b/rpython/jit/metainterp/optimizeopt/vector.py
@@ -132,7 +132,7 @@
         #
         start = time.clock()
         opt = VectorizingOptimizer(metainterp_sd, jitdriver_sd, 
warmstate.vec_cost)
-        index_vars = opt.run_optimization(info, loop)
+        index_vars = opt.run_optimization(metainterp_sd, info, loop)
         gso = GuardStrengthenOpt(index_vars)
         gso.propagate_all_forward(info, loop, user_code)
         end = time.clock()
@@ -222,7 +222,7 @@
         self.smallest_type_bytes = 0
         self.orig_label_args = None
 
-    def run_optimization(self, info, loop):
+    def run_optimization(self, metainterp_sd, info, loop):
         self.orig_label_args = loop.label.getarglist_copy()
         self.linear_find_smallest_type(loop)
         byte_count = self.smallest_type_bytes
@@ -235,7 +235,7 @@
         # find index guards and move to the earliest position
         graph = self.analyse_index_calculations(loop)
         if graph is not None:
-            state = SchedulerState(graph)
+            state = SchedulerState(metainterp_sd.cpu, graph)
             self.schedule(state) # reorder the trace
 
         # unroll
diff --git a/rpython/jit/metainterp/resoperation.py 
b/rpython/jit/metainterp/resoperation.py
--- a/rpython/jit/metainterp/resoperation.py
+++ b/rpython/jit/metainterp/resoperation.py
@@ -1074,11 +1074,9 @@
 
     '_RAW_LOAD_FIRST',
     'GETARRAYITEM_GC/2d/rfi',
-    'VEC_GETARRAYITEM_GC/2d/fi',
     'GETARRAYITEM_RAW/2d/fi',
-    'VEC_GETARRAYITEM_RAW/2d/fi',
     'RAW_LOAD/2d/fi',
-    'VEC_RAW_LOAD/2d/fi',
+    'VEC_LOAD/2d/fi',
     '_RAW_LOAD_LAST',
 
     'GETINTERIORFIELD_GC/2d/rfi',
@@ -1111,11 +1109,9 @@
     'INCREMENT_DEBUG_COUNTER/1/n',
     '_RAW_STORE_FIRST',
     'SETARRAYITEM_GC/3d/n',
-    'VEC_SETARRAYITEM_GC/3d/n',
     'SETARRAYITEM_RAW/3d/n',
-    'VEC_SETARRAYITEM_RAW/3d/n',
     'RAW_STORE/3d/n',
-    'VEC_RAW_STORE/3d/n',
+    'VEC_STORE/3d/n',
     '_RAW_STORE_LAST',
     'SETINTERIORFIELD_GC/3d/n',
     'SETINTERIORFIELD_RAW/3d/n',    # right now, only used by tests
@@ -1711,19 +1707,19 @@
     rop.PTR_NE: rop.PTR_NE,
 }
 _opvector = {
-    rop.RAW_LOAD_I:         rop.VEC_RAW_LOAD_I,
-    rop.RAW_LOAD_F:         rop.VEC_RAW_LOAD_F,
-    rop.GETARRAYITEM_RAW_I: rop.VEC_GETARRAYITEM_RAW_I,
-    rop.GETARRAYITEM_RAW_F: rop.VEC_GETARRAYITEM_RAW_F,
-    rop.GETARRAYITEM_GC_I: rop.VEC_GETARRAYITEM_GC_I,
-    rop.GETARRAYITEM_GC_F: rop.VEC_GETARRAYITEM_GC_F,
+    rop.RAW_LOAD_I:         rop.VEC_LOAD_I,
+    rop.RAW_LOAD_F:         rop.VEC_LOAD_F,
+    rop.GETARRAYITEM_RAW_I: rop.VEC_LOAD_I,
+    rop.GETARRAYITEM_RAW_F: rop.VEC_LOAD_F,
+    rop.GETARRAYITEM_GC_I: rop.VEC_LOAD_I,
+    rop.GETARRAYITEM_GC_F: rop.VEC_LOAD_F,
     # note that there is no _PURE operation for vector operations.
     # reason: currently we do not care if it is pure or not!
-    rop.GETARRAYITEM_GC_PURE_I: rop.VEC_GETARRAYITEM_GC_I,
-    rop.GETARRAYITEM_GC_PURE_F: rop.VEC_GETARRAYITEM_GC_F,
-    rop.RAW_STORE:        rop.VEC_RAW_STORE,
-    rop.SETARRAYITEM_RAW: rop.VEC_SETARRAYITEM_RAW,
-    rop.SETARRAYITEM_GC: rop.VEC_SETARRAYITEM_GC,
+    rop.GETARRAYITEM_GC_PURE_I: rop.VEC_LOAD_I,
+    rop.GETARRAYITEM_GC_PURE_F: rop.VEC_LOAD_F,
+    rop.RAW_STORE:        rop.VEC_STORE,
+    rop.SETARRAYITEM_RAW: rop.VEC_STORE,
+    rop.SETARRAYITEM_GC: rop.VEC_STORE,
 
     rop.INT_ADD:   rop.VEC_INT_ADD,
     rop.INT_SUB:   rop.VEC_INT_SUB,
diff --git a/rpython/jit/metainterp/test/test_vector.py 
b/rpython/jit/metainterp/test/test_vector.py
--- a/rpython/jit/metainterp/test/test_vector.py
+++ b/rpython/jit/metainterp/test/test_vector.py
@@ -28,7 +28,11 @@
     lltype.free(mem, flavor='raw')
 
 def isclose(a, b, rel_tol=1e-09, abs_tol=0.0):
-    return abs(a-b) <= max(rel_tol * max(abs(a), abs(b)), abs_tol)
+    return abs(a-b) <= max(rel_tol * max(abs(a), abs(b)), abs_tol) \
+           or (math.isnan(a) and math.isnan(b)) or \
+                  (math.isinf(a) and math.isinf(b) and \
+                   (a < 0.0 and b < 0.0) or \
+                   (a > 0.0 and b > 0.0))
 
 class RawStorage(object):
     def __init__(self):
@@ -84,6 +88,8 @@
                               type_system=self.type_system,
                               vec=vec, vec_all=vec_all)
 
+    # FLOAT UNARY
+
     def _vector_float_unary(self, func, type, data):
         func = always_inline(func)
 
@@ -109,10 +115,7 @@
         for i in range(l):
             c = raw_storage_getitem(type,vc,i*size)
             r = func(la[i])
-            assert isclose(r, c) or (math.isnan(r) and math.isnan(c)) or \
-                   (math.isinf(r) and math.isinf(c) and \
-                    (r < 0.0 and c < 0.0) or \
-                    (r > 0.0 and c > 0.0))
+            assert isclose(r, c)
 
         rawstorage.clear()
 
@@ -125,15 +128,14 @@
 
     test_vec_abs_float = \
             vec_float_unary(lambda v: abs(v), rffi.DOUBLE)
+    test_vec_neg_float = \
+            vec_float_unary(lambda v: -v, rffi.DOUBLE)
 
+    # FLOAT BINARY
 
-    @given(data=st.data())
-    @pytest.mark.parametrize('func', [lambda a,b: a+b,
-        lambda a,b: a*b, lambda a,b: a-b])
-    def test_vector_simple_float(self, func, data):
+    def _vector_simple_float(self, func, type, data):
         func = always_inline(func)
 
-        type = rffi.DOUBLE
         size = rffi.sizeof(rffi.DOUBLE)
         myjitdriver = JitDriver(greens = [], reds = 'auto', vectorize=True)
         def f(bytecount, va, vb, vc):
@@ -159,15 +161,29 @@
         self.meta_interp(f, [l*size, va, vb, vc])
 
         for i in range(l):
+            import pdb; pdb.set_trace()
             c = raw_storage_getitem(type,vc,i*size)
             r = func(la[i], lb[i])
-            assert isclose(r, c) or (math.isnan(r) and math.isnan(c)) or \
-                   (math.isinf(r) and math.isinf(c) and \
-                    (r < 0.0 and c < 0.0) or \
-                    (r > 0.0 and c > 0.0))
+            assert isclose(r, c)
 
         rawstorage.clear()
 
+    def _vec_float_binary(test_func, func, type):
+        return pytest.mark.parametrize('func,type', [
+            (func, type)
+        ])(given(data=st.data())(test_func))
+
+    vec_float_binary = functools.partial(_vec_float_binary, 
_vector_simple_float)
+
+    test_vector_float_add = \
+        vec_float_binary(lambda a,b: a+b, rffi.DOUBLE)
+    test_vector_float_sub = \
+        vec_float_binary(lambda a,b: a-b, rffi.DOUBLE)
+    test_vector_float_mul = \
+        vec_float_binary(lambda a,b: a*b, rffi.DOUBLE)
+    #test_vector_float_div = \
+    #    vec_float_binary(lambda a,b: a/b, rffi.DOUBLE)
+
     def _vector_simple_int(self, func, type, data):
         func = always_inline(func)
 
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy ppc-vsx-support: refactoring of vec_load/vec_store, now the same simplification (as e.g. raw_load -> gc_load) is done

Reply via email to