Author: Richard Plangger <[email protected]>
Branch: vecopt-merge
Changeset: r79651:5b32b72ad145
Date: 2015-09-16 14:02 +0200
http://bitbucket.org/pypy/pypy/changeset/5b32b72ad145/

Log:    scheduling tests passing again

diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py 
b/rpython/jit/metainterp/optimizeopt/schedule.py
--- a/rpython/jit/metainterp/optimizeopt/schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/schedule.py
@@ -70,13 +70,14 @@
             return True
         return node.depends_count() != 0
 
-    def mark_emitted(self, node, state):
+    def mark_emitted(self, node, state, unpack=True):
         """ An operation has been emitted, adds new operations to the worklist
             whenever their dependency count drops to zero.
             Keeps worklist sorted (see priority) """
         op = node.getoperation()
         state.renamer.rename(op)
-        state.ensure_args_unpacked(op)
+        if unpack:
+            state.ensure_args_unpacked(op)
         node.position = len(state.oplist)
         worklist = state.worklist
         for dep in node.provides()[:]: # COPY
@@ -322,7 +323,7 @@
                     rop.UINT_LT, rop.UINT_LE,
                     rop.UINT_GT, rop.UINT_GE)
 
-def turn_to_vector(state, pack):
+def turn_into_vector(state, pack):
     """ Turn a pack into a vector instruction """
     #
     # TODO self.check_if_pack_supported(pack)
@@ -546,7 +547,7 @@
         i += 1
     else:
         # note that heterogenous nodes are not yet tracked
-        vecop = expanded_map.get(arg, None)
+        vecop = state.find_expanded([arg])
         if vecop:
             args[index] = vecop
             return vecop
@@ -554,12 +555,20 @@
         ops.append(vecop)
         if variables is not None:
             variables.append(vecop)
-        expanded_map[arg] = vecop
+        state.expand([arg], vecop)
+        #expanded_map.setdefault(arg,[]).append((vecop, -1))
         #for i in range(vecop.count):
         #    state.setvector_of_box(arg, i, vecop)
         args[index] = vecop
         return vecop
 
+    # quick search if it has already been expanded
+    expandargs = [op.getoperation().getarg(index) for op in pack.operations]
+    vecop = state.find_expanded(expandargs)
+    if vecop:
+        args[index] = vecop
+        return vecop
+
     vecop = OpHelpers.create_vec(arg.type, left.bytesize, left.signed)
     ops.append(vecop)
     for i,node in enumerate(pack.operations):
@@ -568,8 +577,8 @@
         arguments = [vecop, arg, ConstInt(i), ConstInt(1)]
         vecop = OpHelpers.create_vec_pack(arg.type, arguments, left.bytesize,
                                           left.signed, vecop.count+1)
-        #state.setvector_of_box(arg, i, vecop)
         ops.append(vecop)
+    state.expand(expandargs, vecop)
 
     if variables is not None:
         variables.append(vecop)
@@ -589,6 +598,44 @@
             self.inputargs[arg] = None
         self.seen = {}
 
+    def expand(self, args, vecop):
+        index = 0
+        if len(args) == 1:
+            # loop is executed once, thus sets -1 as index
+            index = -1
+        for arg in args:
+            self.expanded_map.setdefault(arg, []).append((vecop, index))
+            index += 1
+
+    def find_expanded(self, args):
+        if len(args) == 1:
+            candidates = self.expanded_map.get(args[0], [])
+            for (vecop, index) in candidates:
+                if index == -1:
+                    # found an expanded variable/constant
+                    return vecop
+            return None
+        possible = {}
+        for i, arg in enumerate(args):
+            expansions = self.expanded_map.get(arg, [])
+            candidates = [vecop for (vecop, index) in expansions \
+                          if i == index and possible.get(vecop,True)]
+            for vecop in candidates:
+                for key in possible.keys():
+                    if key not in candidates:
+                        # delete every not possible key,value
+                        possible[key] = False
+                # found a candidate, append it if not yet present
+                possible[vecop] = True
+
+            if not possible:
+                # no possibility left, this combination is not expanded
+                return None
+        for vecop,valid in possible.items():
+            if valid:
+                return vecop
+        return None
+
     def post_schedule(self):
         loop = self.graph.loop
         self.ensure_args_unpacked(loop.jump)
@@ -633,8 +680,8 @@
         if node.pack:
             assert node.pack.numops() > 1
             for node in node.pack.operations:
-                scheduler.mark_emitted(node, self)
-            turn_to_vector(self, node.pack)
+                scheduler.mark_emitted(node, self, unpack=False)
+            turn_into_vector(self, node.pack)
             return True
         return False
 
@@ -673,7 +720,7 @@
                         fail_arguments[i] = arg
 
     def ensure_unpacked(self, index, arg):
-        if arg in self.seen or not arg.is_vector():
+        if arg in self.seen or arg.is_vector():
             return arg
         (pos, var) = self.getvector_of_box(arg)
         if var:
@@ -722,7 +769,8 @@
 
     if op.is_typecast():
         if op.casts_down():
-            return vec_reg_size // op.cast_from_bytesize()
+            size = op.cast_input_bytesize(vec_reg_size)
+            return size // op.cast_from_bytesize()
         else:
             return vec_reg_size // op.cast_to_bytesize()
     return  vec_reg_size // op.bytesize
@@ -791,10 +839,10 @@
             if left.casts_down():
                 # size is reduced
                 size = left.cast_input_bytesize(vec_reg_size)
-                import pdb; pdb.set_trace()
                 return left.cast_from_bytesize() * self.numops() - size
             else:
                 # size is increased
+                #size = left.cast_input_bytesize(vec_reg_size)
                 return left.cast_to_bytesize() * self.numops() - vec_reg_size
         return left.bytesize * self.numops() - vec_reg_size
 
@@ -823,10 +871,13 @@
             In this step the pack is reduced in size to fit into an
             vector register.
         """
+        before_count = len(packlist)
+        print "splitting pack", self
         pack = self
         while pack.pack_load(vec_reg_size) > Pack.FULL:
             pack.clear()
             oplist, newoplist = pack.slice_operations(vec_reg_size)
+            print "  split of %dx, left: %d" % (len(oplist), len(newoplist))
             pack.operations = oplist
             pack.update_pack_of_nodes()
             if not pack.leftmost().is_typecast():
@@ -842,6 +893,7 @@
                 newpack.clear()
                 newpack.operations = []
                 break
+        print "  => %dx packs out of %d operations" % (-before_count + 
len(packlist) + 1, sum([pack.numops() for pack in packlist[before_count:]]))
         pack.update_pack_of_nodes()
 
     def slice_operations(self, vec_reg_size):
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py 
b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
@@ -22,6 +22,11 @@
         self.packs = packs
         self.vec_reg_size = 16
 
+class FakeVecScheduleState(VecScheduleState):
+    def __init__(self):
+        self.expanded_map = {}
+
+
 class SchedulerBaseTest(DependencyBaseTest):
 
     def setup_class(self):
@@ -294,13 +299,11 @@
         v15[2xi32] = vec_cast_float_to_int(v11[2xf64])
         v16[2xi32] = vec_cast_float_to_int(v12[2xf64])
         v17[2xi32] = vec_cast_float_to_int(v13[2xf64])
-        v18[2xi16] = vec_int_signext(v14[2xi32],2)
-        v19[2xi16] = vec_int_signext(v15[2xi32],2)
-        v20[2xi16] = vec_int_signext(v16[2xi32],2)
-        v21[2xi16] = vec_int_signext(v17[2xi32],2)
-        v22[4xi16] = vec_pack_i(v18[2xi16], v19[2xi16], 2, 2)
-        v23[6xi16] = vec_pack_i(v22[4xi16], v20[2xi16], 4, 2)
-        v24[8xi16] = vec_pack_i(v23[6xi16], v21[2xi16], 6, 2)
+        v22[4xi32] = vec_pack_i(v14[2xi32], v15[2xi32], 2, 2)
+        v18[4xi16] = vec_int_signext(v22[4xi32],2)
+        v23[6xi16] = vec_pack_i(v16[2xi32], v17[2xi32], 2, 2)
+        v20[4xi16] = vec_int_signext(v23[4xi32],2)
+        v24[8xi16] = vec_pack_i(v18[4xi16], v20[4xi16], 4, 4)
         vec_raw_store(p1, i1, v24[8xi16], descr=short)
         """, False)
         self.assert_equal(loop2, loop3)
@@ -463,3 +466,19 @@
         packset.split_overloaded_packs()
         assert len(packset.packs) == 1
 
+    def test_expand(self):
+        state = FakeVecScheduleState()
+        assert state.find_expanded([]) == None
+        state.expand(['a'], 'a')
+        assert state.find_expanded(['a']) == 'a'
+        state.expand(['a','b','c'], 'abc')
+        assert state.find_expanded(['a','b','c']) == 'abc'
+        state.expand(['a','d','c'], 'adc')
+        assert state.find_expanded(['a','b','c']) == 'abc'
+        assert state.find_expanded(['a','d','c']) == 'adc'
+        assert state.find_expanded(['d','d','c']) == None
+        state.expand(['d','d','c'], 'ddc')
+        assert state.find_expanded(['d','d','c']) == 'ddc'
+
+
+
diff --git a/rpython/jit/metainterp/optimizeopt/vector.py 
b/rpython/jit/metainterp/optimizeopt/vector.py
--- a/rpython/jit/metainterp/optimizeopt/vector.py
+++ b/rpython/jit/metainterp/optimizeopt/vector.py
@@ -830,17 +830,19 @@
 
     def split_overloaded_packs(self):
         newpacks = []
-        import pdb; pdb.set_trace()
         for i,pack in enumerate(self.packs):
             load = pack.pack_load(self.vec_reg_size)
             if load > Pack.FULL:
+                print "overloaded pack", pack
                 pack.split(newpacks, self.vec_reg_size)
                 continue
             if load < Pack.FULL:
+                print "underloaded pack", pack
                 for op in pack.operations:
                     op.priority = -100
                 pack.clear()
                 self.packs[i] = None
                 continue
+            print "fully packed", pack
         self.packs = [pack for pack in self.packs + newpacks if pack]
 
diff --git a/rpython/jit/metainterp/resoperation.py 
b/rpython/jit/metainterp/resoperation.py
--- a/rpython/jit/metainterp/resoperation.py
+++ b/rpython/jit/metainterp/resoperation.py
@@ -456,7 +456,7 @@
     def is_typecast(self):
         return False
 
-    def cast_count(self):
+    def cast_count(self, vec_reg_size):
         return self.casts[4]
 
     def cast_types(self):
@@ -667,7 +667,7 @@
 
     def cast_input_bytesize(self, vec_reg_size):
         count = vec_reg_size // self.cast_to_bytesize()
-        size = self.cast_from_bytesize() * self.count
+        size = self.cast_from_bytesize() * self.cast_count(vec_reg_size)
         return size
 
 class SignExtOp(object):
@@ -689,8 +689,8 @@
         arg = self.getarg(0)
         return arg.bytesize
 
-    def cast_count(self):
-        return self.casts[4]
+    def cast_input_bytesize(self, vec_reg_size):
+        return vec_reg_size # self.cast_from_bytesize() * 
self.cast_count(vec_reg_size)
 
 
 class VectorOp(object):
@@ -1170,11 +1170,11 @@
 ]
 
 _cast_ops = {
-    'CAST_FLOAT_TO_INT': ('f', 8, 'i', 4),
-    'CAST_INT_TO_FLOAT': ('i', 4, 'f', 8),
-    'CAST_FLOAT_TO_SINGLEFLOAT': ('f', 8, 'f', 4),
-    'CAST_SINGLEFLOAT_TO_FLOAT': ('f', 4, 'f', 8),
-    'INT_SIGNEXT': ('i', 0, 'i', 0),
+    'CAST_FLOAT_TO_INT': ('f', 8, 'i', 4, 2),
+    'CAST_INT_TO_FLOAT': ('i', 4, 'f', 8, 2),
+    'CAST_FLOAT_TO_SINGLEFLOAT': ('f', 8, 'f', 4, 2),
+    'CAST_SINGLEFLOAT_TO_FLOAT': ('f', 4, 'f', 8, 2),
+    'INT_SIGNEXT': ('i', 0, 'i', 0, 0),
     #'CAST_PTR_TO_INT': ('r', 0, 'i', 4),
     #'CAST_INT_TO_PTR': ('i', 4, 'r', 0),
 }
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to