[pypy-commit] pypy vecopt-merge: slowly approaching the first passing scheduling test, code is smaller and more compact

plan_rich Mon, 14 Sep 2015 04:33:36 -0700

Author: Richard Plangger <[email protected]>
Branch: vecopt-merge
Changeset: r79622:289407699445
Date: 2015-09-14 13:31 +0200
http://bitbucket.org/pypy/pypy/changeset/289407699445/


Log:    slowly approaching the first passing scheduling test, code is
        smaller and more compact

diff --git a/rpython/jit/metainterp/optimizeopt/dependency.py 
b/rpython/jit/metainterp/optimizeopt/dependency.py
--- a/rpython/jit/metainterp/optimizeopt/dependency.py
+++ b/rpython/jit/metainterp/optimizeopt/dependency.py
@@ -355,7 +355,7 @@
     def __repr__(self):
         pack = ''
         if self.pack:
-            pack = "p: %d" % self.pack.opcount()
+            pack = "p: %d" % self.pack.numops()
         return "Node(%s,%s i: %d)" % (self.op.getopname(), pack, self.opidx)
 
     def __ne__(self, other):
diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py 
b/rpython/jit/metainterp/optimizeopt/schedule.py
--- a/rpython/jit/metainterp/optimizeopt/schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/schedule.py
@@ -18,7 +18,7 @@
 
     def post_schedule(self):
         loop = self.graph.loop
-        self.renamer.rename(loop.label.getoperation())
+        self.renamer.rename(loop.jump)
 
     def profitable(self):
         return self.costmodel.profitable()
@@ -66,7 +66,7 @@
             Keeps worklist sorted (see priority) """
         op = node.getoperation()
         state.renamer.rename(op)
-        state.unpack_from_vector(op, self)
+        state.unpack_from_vector(op)
         node.position = len(state.oplist)
         worklist = state.worklist
         for dep in node.provides()[:]: # COPY
@@ -97,6 +97,7 @@
         """ Emit all the operations into the oplist parameter.
             Initiates the scheduling. """
         assert isinstance(state, SchedulerState)
+        import pdb; pdb.set_trace()
         while state.has_more():
             node = self.next(state)
             if node:
@@ -177,40 +178,43 @@
                 rop.UINT_LT, rop.UINT_LE,
                 rop.UINT_GT, rop.UINT_GE)
 
-class Type(object):
-    """ The type of one operation. Saves type, size and sign. """
-    @staticmethod
-    def of(op):
-        descr = op.getdescr()
-        if descr:
-            type = INT
-            if descr.is_array_of_floats() or descr.concrete_type == FLOAT:
-                type = FLOAT
-            size = descr.get_item_size_in_bytes()
-            sign = descr.is_item_signed()
-            return Type(type, size, sign)
-        else:
-            size = 8
-            sign = True
-            if op.type == 'f' or op.getopnum() in UNSIGNED_OPS:
-                sign = False
-            return Type(op.type, size, sign)
-
-    def __init__(self, type, size, signed):
-        assert type in (FLOAT, INT)
-        self.type = type
-        self.size = size
-        self.signed = signed
-
-    def clone(self):
-        return Type(self.type, self.size, self.signed)
-
-    def __repr__(self):
-        sign = '-'
-        if not self.signed:
-            sign = '+'
-        return 'Type(%s%s, %d)' % (sign, self.type, self.size)
-
+#class Type(object):
+#    """ The type of one operation. Saves type, size and sign. """
+#    @staticmethod
+#    def of(op):
+#        descr = op.getdescr()
+#        if descr:
+#            type = INT
+#            if descr.is_array_of_floats() or descr.concrete_type == FLOAT:
+#                type = FLOAT
+#            size = descr.get_item_size_in_bytes()
+#            sign = descr.is_item_signed()
+#            return Type(type, size, sign)
+#        else:
+#            size = 8
+#            sign = True
+#            if op.type == 'f' or op.getopnum() in UNSIGNED_OPS:
+#                sign = False
+#            return Type(op.type, size, sign)
+#
+#    def __init__(self, type, size, signed):
+#        assert type in (FLOAT, INT)
+#        self.type = type
+#        self.size = size
+#        self.signed = signed
+#
+#    def bytecount(self):
+#        return self.size
+#
+#    def clone(self):
+#        return Type(self.type, self.size, self.signed)
+#
+#    def __repr__(self):
+#        sign = '-'
+#        if not self.signed:
+#            sign = '+'
+#        return 'Type(%s%s, %d)' % (sign, self.type, self.size)
+#
     #UNKNOWN_TYPE = '-'
 
     #@staticmethod
@@ -268,10 +272,6 @@
     #def getcount(self):
     #    return self.count
 
-    #def pack_byte_size(self, pack):
-    #    if len(pack.operations) == 0:
-    #        return 0
-    #    return self.getsize() * pack.opcount()
 
 class TypeRestrict(object):
     ANY_TYPE = -1
@@ -301,6 +301,20 @@
         self.type = type
         self.count = count
 
+
+    def bytecount(self):
+        return self.count * self.type.bytecount()
+
+class DataTyper(object):
+
+    def infer_type(self, op):
+        # default action, pass through: find the first arg
+        # the output is the same as the first argument!
+        if op.returns_void() or op.argcount() == 0:
+            return
+        arg0 = op.getarg(0)
+        op.setdatatype(arg0.datatype, arg0.bytesize, arg0.signed)
+
 class PassFirstArg(TypeOutput):
     def __init__(self):
         pass
@@ -316,17 +330,14 @@
         op = pack.leftmost()
         args = op.getarglist()
         self.prepare_arguments(state, op.getarglist())
-        #
-        vop = VecOperation(op.vector, args, otype.   op.getdescr())
-        #result = self.transform_result(op)
+        vop = VecOperation(op.vector, args, op, pack.numops(), op.getdescr())
         #
         if op.is_guard():
             assert isinstance(op, GuardResOp)
             assert isinstance(vop, GuardResOp)
             vop.setfailargs(op.getfailargs())
             vop.rd_snapshot = op.rd_snapshot
-        self.vecops.append(vop)
-        self.costmodel.record_pack_savings(self.pack, self.pack.opcount())
+        state.costmodel.record_pack_savings(pack, pack.numops())
         #
         if pack.is_accumulating():
             box = oplist[position].result
@@ -335,8 +346,10 @@
                 op = node.getoperation()
                 assert not op.returns_void()
                 scheduler.renamer.start_renaming(op, box)
+        #
+        state.oplist.append(vop)
 
-    def transform_arguments(self, state, args):
+    def prepare_arguments(self, state, args):
         self.before_argument_transform(args)
         # Transforming one argument to a vector box argument
         # The following cases can occur:
@@ -732,12 +745,12 @@
     def __init__(self):
         OpToVectorOp.__init__(self, (), TypeRestrict())
 
-    def before_argument_transform(self, args):
-        count = min(self.output_type.getcount(), len(self.getoperations()))
-        args.append(ConstInt(count))
+    # OLD def before_argument_transform(self, args):
+        #count = min(self.output_type.getcount(), len(self.getoperations()))
+        #args.append(ConstInt(count))
 
     def get_output_type_given(self, input_type, op):
-        return Type.by_descr(op.getdescr(), self.sched_data.vec_reg_size)
+        return xxx#Type.by_descr(op.getdescr(), self.sched_data.vec_reg_size)
 
     def get_input_type_given(self, output_type, op):
         return None
@@ -760,7 +773,7 @@
         return None
 
     def get_input_type_given(self, output_type, op):
-        return Type.by_descr(op.getdescr(), self.sched_data.vec_reg_size)
+        return xxx#Type.by_descr(op.getdescr(), self.sched_data.vec_reg_size)
 
 class PassThroughOp(OpToVectorOp):
     """ This pass through is only applicable if the target
@@ -778,7 +791,7 @@
 
 
 class trans(object):
-    PASS = PassFirstArg()
+    DT_PASS = DataTyper()
 
     TR_ANY_FLOAT = TypeRestrict(FLOAT)
     TR_ANY_INTEGER = TypeRestrict(INT)
@@ -787,9 +800,9 @@
     TR_LONG = TypeRestrict(INT, 8, 2)
     TR_INT_2 = TypeRestrict(INT, 4, 2)
 
-    INT = OpToVectorOp((TR_ANY_INTEGER, TR_ANY_INTEGER), PASS)
-    FLOAT = OpToVectorOp((TR_ANY_FLOAT, TR_ANY_FLOAT), PASS)
-    FLOAT_UNARY = OpToVectorOp((TR_ANY_FLOAT,), PASS)
+    INT = OpToVectorOp((TR_ANY_INTEGER, TR_ANY_INTEGER), DT_PASS)
+    FLOAT = OpToVectorOp((TR_ANY_FLOAT, TR_ANY_FLOAT), DT_PASS)
+    FLOAT_UNARY = OpToVectorOp((TR_ANY_FLOAT,), DT_PASS)
     LOAD = LoadToVectorLoad()
     STORE = StoreToVectorStore()
     GUARD = PassThroughOp((TR_ANY_INTEGER,))
@@ -839,6 +852,11 @@
         rop.VEC_INT_IS_TRUE: OpToVectorOp((TR_ANY_INTEGER,TR_ANY_INTEGER), 
None), # TR_ANY_INTEGER),
     }
 
+    # TODO?
+    UNSIGNED_OPS = (rop.UINT_FLOORDIV, rop.UINT_RSHIFT,
+                    rop.UINT_LT, rop.UINT_LE,
+                    rop.UINT_GT, rop.UINT_GE)
+
 def determine_input_output_types(pack, node, forward):
     """ This function is two fold. If moving forward, it
         gets an input type from the packs output type and returns
@@ -888,9 +906,11 @@
 
     def post_schedule(self):
         loop = self.graph.loop
-        self.sched_data.unpack_from_vector(loop.jump.getoperation(), self)
+        self.unpack_from_vector(loop.jump)
         SchedulerState.post_schedule(self)
 
+        self.graph.loop.operations = self.oplist
+
         # add accumulation info to the descriptor
         #for version in self.loop.versions:
         #    # this needs to be done for renamed (accum arguments)
@@ -928,11 +948,11 @@
             to emit the actual operation into the oplist of the scheduler.
         """
         if node.pack:
+            assert node.pack.numops() > 1
             for node in node.pack.operations:
                 scheduler.mark_emitted(node, self)
-                assert node.pack.opcount() > 1
-                op2vecop = determine_trans(node.pack.leftmost())
-                op2vecop.as_vector_operation(self, node.pack)
+            op2vecop = determine_trans(node.pack.leftmost())
+            op2vecop.as_vector_operation(self, node.pack)
             return True
         return False
 
@@ -950,7 +970,7 @@
                         return True
         return False
 
-    def unpack_from_vector(self, op, scheduler):
+    def unpack_from_vector(self, op):
         """ If a box is needed that is currently stored within a vector
             box, this utility creates a unpacking instruction.
         """
@@ -959,7 +979,7 @@
         # unpack for an immediate use
         for i, arg in enumerate(op.getarglist()):
             if not arg.is_constant():
-                argument = self._unpack_from_vector(i, arg, scheduler)
+                argument = self._unpack_from_vector(i, arg)
                 if arg is not argument:
                     op.setarg(i, argument)
         if not op.returns_void():
@@ -969,11 +989,11 @@
             fail_args = op.getfailargs()
             for i, arg in enumerate(fail_args):
                 if arg and not arg.is_constant():
-                    argument = self._unpack_from_vector(i, arg, scheduler)
+                    argument = self._unpack_from_vector(i, arg)
                     if arg is not argument:
                         fail_args[i] = argument
 
-    def _unpack_from_vector(self, i, arg, scheduler):
+    def _unpack_from_vector(self, i, arg):
         if arg in self.seen or arg.type == 'V':
             return arg
         (j, vbox) = self.getvector_of_box(arg)
@@ -982,14 +1002,14 @@
                 return arg
             arg_cloned = arg.clonebox()
             self.seen[arg_cloned] = None
-            scheduler.renamer.start_renaming(arg, arg_cloned)
+            self.renamer.start_renaming(arg, arg_cloned)
             self.setvector_of_box(arg_cloned, j, vbox)
             cj = ConstInt(j)
             ci = ConstInt(1)
             opnum = getunpackopnum(vbox.gettype())
             unpack_op = ResOperation(opnum, [vbox, cj, ci], arg_cloned)
             self.costmodel.record_vector_unpack(vbox, j, 1)
-            scheduler.oplist.append(unpack_op)
+            self.oplist.append(unpack_op)
             return arg_cloned
         return arg
 
@@ -1042,15 +1062,19 @@
     """
     FULL = 0
 
-    def __init__(self, ops, input_type, output_type):
+    def __init__(self, ops):
         self.operations = ops
         self.accum = None
-        self.input_type = input_type
-        self.output_type = output_type
-        assert self.input_type is not None or self.output_type is not None
         self.update_pack_of_nodes()
+        # initializes the type
+        # TODO
+        #input_type, output_type = \
+        #    determine_input_output_types(origin_pack, lnode, forward)
+        #self.input_type = input_type
+        #self.output_type = output_type
+        #assert self.input_type is not None or self.output_type is not None
 
-    def opcount(self):
+    def numops(self):
         return len(self.operations)
 
     def leftmost(self):
@@ -1078,22 +1102,25 @@
         return self._byte_size(self.output_type)
 
     def pack_load(self, vec_reg_size):
-        """ Returns the load of the pack. A value
-            smaller than 0 indicates that it is empty
-            or nearly empty, zero indicates that all slots
-            are used and > 0 indicates that too many operations
-            are in this pack instance.
+        """ Returns the load of the pack a vector register would hold
+            just after executing the operation.
+            returns: < 0 - empty, nearly empty
+                     = 0 - full
+                     > 0 - overloaded
         """
-        if len(self.operations) == 0:
+        left = self.leftmost()
+        if left.returns_void():
+            return 0
+        if self.numops() == 0:
             return -1
         size = maximum_byte_size(self, vec_reg_size)
-        if self.input_type is None:
+        return left.bytesize * self.numops() - size
+        #if self.input_type is None:
             # e.g. load operations
-            return self.output_type.pack_byte_size(self) - size
+        #    return self.output_type.bytecount(self) - size
         # default only consider the input type
         # e.g. store operations, int_add, ...
-        return self.input_type.pack_byte_size(self) - size
-
+        #return self.input_type.bytecount(self) - size
 
     def is_full(self, vec_reg_size):
         """ If one input element times the opcount is equal
@@ -1131,12 +1158,14 @@
             newpack = pack.clone(newoplist)
             load = newpack.pack_load(vec_reg_size)
             if load >= Pack.FULL:
+                pack.update_pack_of_nodes()
                 pack = newpack
                 packlist.append(newpack)
             else:
                 newpack.clear()
                 newpack.operations = []
                 break
+        pack.update_pack_of_nodes()
 
     def slice_operations(self, vec_reg_size):
         count = opcount_filling_vector_register(self, vec_reg_size)
@@ -1163,31 +1192,26 @@
 
     def __repr__(self):
         if len(self.operations) == 0:
-            return "Pack(-, [])"
-        opname = self.operations[0].getoperation().getopname()
-        return "Pack(%s,%r)" % (opname, self.operations)
+            return "Pack(empty)"
+        return "Pack(%dx %s)" % (self.numops(), self.operations[0])
 
     def is_accumulating(self):
         return self.accum is not None
 
     def clone(self, oplist):
-        cloned = Pack(oplist, self.input_type, self.output_type)
+        cloned = Pack(oplist)
         cloned.accum = self.accum
         return cloned
 
 
 class Pair(Pack):
     """ A special Pack object with only two statements. """
-    def __init__(self, left, right, input_type, output_type):
+    def __init__(self, left, right):
         assert isinstance(left, Node)
         assert isinstance(right, Node)
         self.left = left
         self.right = right
-        if input_type:
-            input_type = input_type.clone()
-        if output_type:
-            output_type = output_type.clone()
-        Pack.__init__(self, [left, right], input_type, output_type)
+        Pack.__init__(self, [left, right])
 
     def __eq__(self, other):
         if isinstance(other, Pair):
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py 
b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
@@ -7,7 +7,7 @@
         Pack, Pair, NotAProfitableLoop, VectorizingOptimizer, X86_CostModel,
         PackSet)
 from rpython.jit.metainterp.optimizeopt.dependency import Node, DependencyGraph
-from rpython.jit.metainterp.optimizeopt.schedule import Type, Scheduler
+from rpython.jit.metainterp.optimizeopt.schedule import Scheduler
 from rpython.jit.metainterp.optimizeopt.test.test_util import LLtypeMixin
 from rpython.jit.metainterp.optimizeopt.test.test_dependency import 
(DependencyBaseTest,
         FakeDependencyGraph)
@@ -17,13 +17,13 @@
 from rpython.jit.tool.oparser import parse as opparse
 from rpython.jit.tool.oparser_model import get_model
 
-F64 = Type('f',8,False)
-F32 = Type('f',4,False)
-F32_2 =  Type('f',4,False)
-I64 = Type('i',8,True)
-I32 = Type('i',4,True)
-I32_2 =  Type('i',4,True)
-I16 = Type('i',2,True)
+F64 = None #('f',8,False)
+F32 = None #('f',4,False)
+F32_2 =  None #('f',4,False)
+I64 = None #('i',8,True)
+I32 = None #('i',4,True)
+I32_2 =  None #('i',4,True)
+I16 = None #('i',2,True)
 
 class FakePackSet(PackSet):
     def __init__(self, packs):
@@ -68,7 +68,7 @@
         return loop
 
     def pack(self, loop, l, r, input_type, output_type):
-        return Pack(loop.graph.nodes[1+l:1+r], input_type, output_type)
+        return Pack(loop.graph.nodes[1+l:1+r])
 
     def schedule(self, loop, packs, vec_reg_size=16,
                  prepend_invariant=False, overwrite_funcs=None):
@@ -79,7 +79,7 @@
             for i in range(len(pack.operations)-1):
                 o1 = pack.operations[i]
                 o2 = pack.operations[i+1]
-                pair = Pair(o1,o2,pack.input_type,pack.output_type)
+                pair = Pair(o1,o2)
                 pairs.append(pair)
         packset = FakePackSet(pairs)
         state = VecScheduleState(loop.graph, packset, self.cpu, cm)
@@ -94,6 +94,9 @@
             state.prepend_invariant_operations = lambda list, _: list
         opt.combine_packset()
         opt.schedule(state)
+        # works for now. might be the wrong class?
+        # wrap label + operations + jump it in tree loop otherwise
+        return state.graph.loop
 
 class Test(SchedulerBaseTest, LLtypeMixin):
 
@@ -124,7 +127,7 @@
         pack1 = self.pack(loop1, 0, 6, None, F32)
         loop2 = self.schedule(loop1, [pack1])
         loop3 = self.parse_trace("""
-        v10[i32|4] = vec_raw_load_i(p0, i0, 4, descr=float)
+        v10[4xi32] = vec_raw_load_i(p0, i0, descr=float)
         f10 = raw_load_f(p0, i4, descr=float)
         f11 = raw_load_f(p0, i5, descr=float)
         """, False)
@@ -144,7 +147,7 @@
         pack3 = self.pack(loop1, 4, 6, I32_2, F32_2)
         loop2 = self.schedule(loop1, [pack1, pack2, pack3])
         loop3 = self.parse_trace("""
-        v10[i64|2] = vec_raw_load_i(p0, i0, 2, descr=long)
+        v10[i64|2] = vec_raw_load_i(p0, i0, descr=long)
         v20[i32|2] = vec_int_signext(v10[i64|2], 4)
         v30[f64|2] = vec_cast_int_to_float(v20[i32|2])
         """, False)
@@ -268,7 +271,7 @@
         """)
         pack1 = self.pack(loop1, 0, 8, None, F64)
         pack2 = self.pack(loop1, 8, 16, F64, I32_2)
-        I16_2 = Type('i',2,True)
+        I16_2 = None #Type('i',2,True)
         pack3 = self.pack(loop1, 16, 24, I32_2, I16_2)
         pack4 = self.pack(loop1, 24, 32, I16, None)
         def void(b,c):
@@ -278,10 +281,10 @@
                                   '_prevent_signext': void
                               })
         loop3 = self.parse_trace("""
-        v10[f64|2] = vec_raw_load_f(p0, i1, 2, descr=double)
-        v11[f64|2] = vec_raw_load_f(p0, i3, 2, descr=double)
-        v12[f64|2] = vec_raw_load_f(p0, i5, 2, descr=double)
-        v13[f64|2] = vec_raw_load_f(p0, i7, 2, descr=double)
+        v10[f64|2] = vec_raw_load_f(p0, i1, descr=double)
+        v11[f64|2] = vec_raw_load_f(p0, i3, descr=double)
+        v12[f64|2] = vec_raw_load_f(p0, i5, descr=double)
+        v13[f64|2] = vec_raw_load_f(p0, i7, descr=double)
         v14[i32|2] = vec_cast_float_to_int(v10[f64|2])
         v15[i32|2] = vec_cast_float_to_int(v11[f64|2])
         v16[i32|2] = vec_cast_float_to_int(v12[f64|2])
@@ -319,8 +322,8 @@
         pack3 = self.pack(loop1, 8, 12, I32, None)
         loop2 = self.schedule(loop1, [pack1,pack2,pack3])
         loop3 = self.parse_trace("""
-        v44[f64|2] = vec_raw_load_f(p0, i1, 2, descr=double) 
-        v45[f64|2] = vec_raw_load_f(p0, i3, 2, descr=double) 
+        v44[f64|2] = vec_raw_load_f(p0, i1, descr=double) 
+        v45[f64|2] = vec_raw_load_f(p0, i3, descr=double) 
         v46[i32|2] = vec_cast_float_to_singlefloat(v44[f64|2]) 
         v47[i32|2] = vec_cast_float_to_singlefloat(v45[f64|2]) 
         v41[i32|4] = vec_int_pack(v46[i32|2], v47[i32|2], 2, 2) 
@@ -345,7 +348,7 @@
         loop2 = self.schedule(loop1, [pack1,pack2,pack3], 
prepend_invariant=True)
         loop3 = self.parse_trace("""
         v9[i64|2] = vec_int_expand(255,2)
-        v10[i64|2] = vec_raw_load_i(p0, i1, 2, descr=long)
+        v10[i64|2] = vec_raw_load_i(p0, i1, descr=long)
         v11[i64|2] = vec_int_and(v10[i64|2], v9[i64|2])
         guard_true(v11[i64|2]) []
         """, False)
@@ -365,7 +368,7 @@
         pack2 = self.pack(loop1, 4, 6, I32_2, None)
         loop2 = self.schedule(loop1, [pack1,pack2], prepend_invariant=True)
         loop3 = self.parse_trace("""
-        v1[i32|4] = vec_raw_load_i(p0, i1, 4, descr=float)
+        v1[i32|4] = vec_raw_load_i(p0, i1, descr=float)
         i10 = vec_int_unpack(v1[i32|4], 0, 1)
         raw_store(p0, i3, i10, descr=float)
         i11 = vec_int_unpack(v1[i32|4], 1, 1)
diff --git a/rpython/jit/metainterp/optimizeopt/vector.py 
b/rpython/jit/metainterp/optimizeopt/vector.py
--- a/rpython/jit/metainterp/optimizeopt/vector.py
+++ b/rpython/jit/metainterp/optimizeopt/vector.py
@@ -22,7 +22,7 @@
 from rpython.jit.metainterp.optimizeopt.version import LoopVersionInfo
 from rpython.jit.metainterp.optimizeopt.schedule import (VecScheduleState,
         Scheduler, Pack, Pair, AccumPair, vectorbox_outof_box, getpackopnum,
-        getunpackopnum, Type, determine_input_output_types)
+        getunpackopnum)
 from rpython.jit.metainterp.optimizeopt.guard import GuardStrengthenOpt
 from rpython.jit.metainterp.resoperation import (rop, ResOperation, 
GuardResOp, Accum)
 from rpython.rlib import listsort
@@ -453,7 +453,7 @@
         state.prepare()
         scheduler = Scheduler()
         scheduler.walk_and_emit(state)
-        if state.profitable():
+        if not state.profitable():
             return
         state.post_schedule()
 
@@ -674,15 +674,11 @@
                 if origin_pack is None:
                     op = lnode.getoperation()
                     if op.is_primitive_load():
-                        # load outputs value, no input
-                        return Pair(lnode, rnode, None, Type.of(op))
+                        return Pair(lnode, rnode)
                     else:
-                        # store only has an input
-                        return Pair(lnode, rnode, Type.of(op), None)
+                        return Pair(lnode, rnode)
                 if self.profitable_pack(lnode, rnode, origin_pack, forward):
-                    input_type, output_type = \
-                        determine_input_output_types(origin_pack, lnode, 
forward)
-                    return Pair(lnode, rnode, input_type, output_type)
+                    return Pair(lnode, rnode)
             else:
                 if self.contains_pair(lnode, rnode):
                     return None
@@ -734,17 +730,9 @@
         operations = pack_i.operations
         for op in pack_j.operations[1:]:
             operations.append(op)
-        input_type = pack_i.input_type
-        output_type = pack_i.output_type
-        if input_type:
-            input_type.combine(pack_j.input_type)
-        if output_type:
-            output_type.combine(pack_j.output_type)
-        pack = Pack(operations, input_type, output_type)
+        pack = Pack(operations)
         self.packs[i] = pack
-        # preserve the accum variable (if present) of the
-        # left most pack, that is the pack with the earliest
-        # operation at index 0 in the trace
+        # preserve the accum variable (if present)
         pack.accum = pack_i.accum
         pack_i.accum = pack_j.accum = None
 
@@ -851,6 +839,5 @@
                 pack.clear()
                 self.packs[i] = None
                 continue
-            pack.update_pack_of_nodes()
         self.packs = [pack for pack in self.packs + newpacks if pack]
 
diff --git a/rpython/jit/metainterp/resoperation.py 
b/rpython/jit/metainterp/resoperation.py
--- a/rpython/jit/metainterp/resoperation.py
+++ b/rpython/jit/metainterp/resoperation.py
@@ -80,21 +80,70 @@
         elif op.is_guard():
             assert not descr.final_descr
         op.setdescr(descr)
+    op.inittype()
     return op
 
-def VecOperation(opnum, args, type, count, descr=None):
+def VecOperation(opnum, args, baseop, count, descr=None):
+    return VecOperationNew(opnum, args, baseop.datatype, baseop.bytesize, 
baseop.signed, count, descr)
+
+def VecOperationNew(opnum, args, datateyp, bytesize, signed, count, 
descr=None):
     op = ResOperation(opnum, args, descr)
-    op.item_type = type
-    op.item_count = count
+    op.datatype = datateyp
+    op.bytesize = bytesize
+    op.signed = signed
+    op.count = count
     return op
 
-class AbstractResOpOrInputArg(AbstractValue):
+class Typed(object):
+    _mixin_ = True
+    _attrs_ = ('datatype', 'bytesize', 'signed')
+
+    datatype = '\x00'
+    bytesize = -1
+    signed = True
+
+    def inittype(self):
+        if self.returns_void():
+            self.bytesize = 0
+            self.datatype = 'v'
+            return
+
+        if self.is_primitive_array_access():
+            descr = self.getdescr()
+            type = self.type
+            if descr.is_array_of_floats() or descr.concrete_type == 'f':
+                type = FLOAT
+            self.bytesize = descr.get_item_size_in_bytes()
+            self.sign = descr.is_item_signed()
+            self.datatype = type
+        else:
+            # pass through the type of the first input argument
+            if self.numargs() == 0:
+                return
+            arg0 = self.getarg(0)
+            self.setdatatype(arg0.datatype, arg0.bytesize, arg0.signed)
+        assert self.datatype != '\x00'
+        assert self.bytesize > 0
+
+    def setdatatype(self, data_type, bytesize, signed):
+        self.datatype = data_type
+        self.bytesize = bytesize
+        self.signed = signed
+
+    def typestr(self):
+        sign = '-'
+        if not self.signed:
+            sign = '+'
+        return 'Type(%s%s, %d)' % (sign, self.type, self.size)
+
+class AbstractResOpOrInputArg(AbstractValue, Typed):
     _attrs_ = ('_forwarded',)
     _forwarded = None # either another resop or OptInfo  
 
     def get_forwarded(self):
         return self._forwarded
 
+
 class AbstractResOp(AbstractResOpOrInputArg):
     """The central ResOperation class, representing one operation."""
 
@@ -109,6 +158,7 @@
     boolreflex = -1
     boolinverse = -1
     vector = -1 # -1 means, no vector equivalent, -2 it is a vector statement
+    casts = ('\x00', -1, '\x00', -1)
 
     def getopnum(self):
         return self.opnum
@@ -192,7 +242,11 @@
             except KeyError:
                 num = len(memo)
                 memo[self] = num
-            sres = self.type + str(num) + ' = '
+            if self.is_vector():
+                assert isinstance(self, VectorOp)
+                sres = 'v%d[%dx%s%d] = ' % (num, self.count, self.datatype, 
self.bytesize * 8)
+            else:
+                sres = self.type + str(num) + ' = '
         #if self.result is not None:
         #    sres = '%s = ' % (self.result,)
         else:
@@ -219,6 +273,10 @@
         except KeyError:
             num = len(memo)
             memo[self] = num
+        if self.is_vector():
+            assert isinstance(self, VectorOp)
+            return 'v%d[%dx%s%d]' % (num, self.count, self.datatype,
+                                     self.bytesize * 8)
         return self.type + str(num)
 
     def __repr__(self):
@@ -363,6 +421,9 @@
     def is_label(self):
         return self.getopnum() == rop.LABEL
 
+    def is_vector(self):
+        return self.vector == -2
+
     def returns_void(self):
         return self.type == 'v'
 
@@ -376,28 +437,6 @@
 class PlainResOp(AbstractResOp):
     pass
 
-class CastResOp(AbstractResOp):
-    _attrs_ = ('casts')
-    casts = ('\x00', -1, '\x00', -1)
-
-    def casts_box(self):
-        return True
-
-    def cast_to(self):
-        _, _, to_type, size = self.casts
-        if self.casts[3] == 0:
-            if self.getopnum() == rop.INT_SIGNEXT:
-                from rpython.jit.metainterp.history import ConstInt
-                arg = self.getarg(1)
-                assert isinstance(arg, ConstInt)
-                return (to_type,arg.value)
-            else:
-                raise NotImplementedError
-        return (to_type,size)
-
-    def cast_from(self):
-        return ('\x00',-1)
-
 class ResOpWithDescr(AbstractResOp):
 
     _descr = None
@@ -556,68 +595,56 @@
     def accumulates_value(self):
         return True
 
+class CastOp(object):
+    _mixin_ = True
+
+    def casts_box(self):
+        return True
+
+    def cast_to(self):
+        _, _, to_type, size = self.casts
+        if self.casts[3] == 0:
+            if self.getopnum() == rop.INT_SIGNEXT:
+                from rpython.jit.metainterp.history import ConstInt
+                arg = self.getarg(1)
+                assert isinstance(arg, ConstInt)
+                return (to_type,arg.value)
+            else:
+                raise NotImplementedError
+        return (to_type,size)
+
+    def cast_from(self):
+        return ('\x00',-1)
+
 class VectorOp(object):
     _mixin_ = True
-    #_attrs_ = ('item_type','item_count','item_size','item_signed','accum')
-    _attrs_ = ('item_type', 'item_count')
-
-    #def __init__(self, item_type=FLOAT, item_count=2, item_size=8, 
item_signed=False, accum=None):
-    #    assert item_type in (FLOAT, INT)
-    #    self.item_type = item_type
-    #    self.item_count = item_count
-    #    self.item_size = item_size
-    #    self.item_signed = item_signed
-    #    self.accum = None
-
-    def gettype(self):
-        return self.type
-
-    def getbytes(self):
-        return self.slot_bytes
-
-    def getcount(self):
-        return self.item_count
-
-    def fully_packed(self, vec_reg_size):
-        return self.item_size * self.item_count == vec_reg_size
-
-    def forget_value(self):
-        raise NotImplementedError("cannot forget value of vector")
-
-    def clonebox(self):
-        return BoxVector(self.item_type, self.item_count, self.item_size, 
self.item_signed)
-
-    def constbox(self):
-        raise NotImplementedError("not possible to have a constant vector box")
-
-    def nonnull(self):
-        raise NotImplementedError("no value known, nonnull is unkown")
+    _attrs_ = ('count',)
 
     def repr_rpython(self):
         return repr_rpython(self, 'bv')
 
     def same_shape(self, other):
-        if not isinstance(other, BoxVector):
+        """ NOT_RPYTHON """
+        if not other.is_vector():
             return False
         #
-        if other.item_size == -1 or self.item_size == -1:
+        # TODO ? if other.item_size == -1 or self.item_size == -1:
             # fallback for tests that do not specify the size
-            return True
+        #    return True
         #
-        if self.item_type != other.item_type:
+        if self.datatype != other.datatype:
             return False
-        if self.item_size != other.item_size:
+        if self.bytesize != other.bytesize:
             return False
-        if self.item_count != other.item_count:
+        if self.signed!= other.signed:
             return False
-        if self.item_signed != other.item_signed:
+        if self.count != other.count:
             return False
         return True
 
     def getaccum(self):
         return self.accum
 
-
 class AbstractInputArg(AbstractResOpOrInputArg):
     def set_forwarded(self, forwarded_to):
         self._forwarded = forwarded_to
@@ -642,6 +669,9 @@
     def is_inputarg(self):
         return True
 
+    def initinputtype(self, cpu):
+        pass
+
 class InputArgInt(IntOp, AbstractInputArg):
     def __init__(self, intval=0):
         self.setint(intval)
@@ -974,11 +1004,11 @@
 
     '_RAW_LOAD_FIRST',
     'GETARRAYITEM_GC/2d/rfi',
-    'VEC_GETARRAYITEM_GC/3d/fi',
+    'VEC_GETARRAYITEM_GC/2d/fi',
     'GETARRAYITEM_RAW/2d/fi',
-    'VEC_GETARRAYITEM_RAW/3d/fi',
+    'VEC_GETARRAYITEM_RAW/2d/fi',
     'RAW_LOAD/2d/fi',
-    'VEC_RAW_LOAD/3d/fi',
+    'VEC_RAW_LOAD/2d/fi',
     '_RAW_LOAD_LAST',
 
     'GETINTERIORFIELD_GC/2d/rfi',
@@ -1059,19 +1089,15 @@
     '_LAST',     # for the backend to add more internal operations
 ]
 
-FLOAT = 'f'
-INT = 'i'
 _cast_ops = {
-    'INT_SIGNEXT': (INT, 0, INT, 0),
-    'CAST_FLOAT_TO_INT': (FLOAT, 8, INT, 4),
-    'CAST_INT_TO_FLOAT': (INT, 4, FLOAT, 8),
-    'CAST_FLOAT_TO_SINGLEFLOAT': (FLOAT, 8, FLOAT, 4),
-    'CAST_SINGLEFLOAT_TO_FLOAT': (FLOAT, 4, FLOAT, 8),
-    'CAST_PTR_TO_INT': (INT, 0, INT, 4),
-    'CAST_INT_TO_PTR': (INT, 4, INT, 0),
+    'INT_SIGNEXT': ('i', 0, 'i', 0),
+    'CAST_FLOAT_TO_INT': ('f', 8, 'i', 4),
+    'CAST_INT_TO_FLOAT': ('i', 4, 'f', 8),
+    'CAST_FLOAT_TO_SINGLEFLOAT': ('f', 8, 'f', 4),
+    'CAST_SINGLEFLOAT_TO_FLOAT': ('f', 4, 'f', 8),
+    'CAST_PTR_TO_INT': ('r', 0, 'i', 4),
+    'CAST_INT_TO_PTR': ('i', 4, 'r', 0),
 }
-del FLOAT
-del INT
 
 # ____________________________________________________________
 
@@ -1156,8 +1182,6 @@
     if is_guard:
         assert withdescr
         baseclass = GuardResOp
-    elif name in _cast_ops:
-        baseclass = CastResOp
     elif withdescr:
         baseclass = ResOpWithDescr
     else:
@@ -1171,6 +1195,8 @@
         mixins.append(RefOp)
     else:
         assert result_type == 'n'
+    if name in _cast_ops:
+        mixins.append(CastOp)
     if name.startswith('VEC'):
         mixins.insert(1,VectorOp)
 
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy vecopt-merge: slowly approaching the first passing scheduling test, code is smaller and more compact

Reply via email to