Author: Richard Plangger <r...@pasra.at>
Branch: vecopt2
Changeset: r77124:1f1fd65e76ab
Date: 2015-04-24 18:43 +0200
http://bitbucket.org/pypy/pypy/changeset/1f1fd65e76ab/

Log:    changes to make the rtyper work correctly, SIMD loads now only from
        is now aligned (not correct, just for testing)

diff --git a/rpython/jit/backend/x86/assembler.py 
b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -2434,7 +2434,6 @@
     def _vec_load(self, resloc, src_addr, integer, itemsize, aligned):
         if integer:
             if aligned:
-                raise NotImplementedError
                 self.mc.MOVDQA(resloc, src_addr)
             else:
                 self.mc.MOVDQU(resloc, src_addr)
@@ -2461,7 +2460,7 @@
     def _vec_store(self, dest_loc, value_loc, integer, itemsize, aligned):
         if integer:
             if aligned:
-                raise NotImplementedError
+                self.mc.MOVDQA(dest_loc, value_loc)
             else:
                 self.mc.MOVDQU(dest_loc, value_loc)
         else:
@@ -2473,7 +2472,11 @@
     def genop_vec_int_add(self, op, arglocs, resloc):
         loc0, loc1, itemsize_loc = arglocs
         itemsize = itemsize_loc.value
-        if itemsize == 4:
+        if itemsize == 1:
+            self.mc.PADDB(loc0, loc1)
+        elif itemsize == 2:
+            self.mc.PADDW(loc0, loc1)
+        elif itemsize == 4:
             self.mc.PADDD(loc0, loc1)
         elif itemsize == 8:
             self.mc.PADDQ(loc0, loc1)
diff --git a/rpython/jit/backend/x86/regalloc.py 
b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -1466,7 +1466,7 @@
                not descr.is_array_of_structs()
         itemsize, ofs, _ = unpack_arraydescr(descr)
         integer = not descr.is_array_of_floats()
-        aligned = False
+        aligned = True
         args = op.getarglist()
         base_loc = self.rm.make_sure_var_in_reg(op.getarg(0), args)
         ofs_loc = self.rm.make_sure_var_in_reg(op.getarg(1), args)
@@ -1487,7 +1487,7 @@
         ofs_loc = self.rm.make_sure_var_in_reg(op.getarg(1), args)
 
         integer = not descr.is_array_of_floats()
-        aligned = False
+        aligned = True
         self.perform_discard(op, [base_loc, ofs_loc, value_loc,
                                  imm(itemsize), imm(ofs), imm(integer), 
imm(aligned)])
 
diff --git a/rpython/jit/backend/x86/regloc.py 
b/rpython/jit/backend/x86/regloc.py
--- a/rpython/jit/backend/x86/regloc.py
+++ b/rpython/jit/backend/x86/regloc.py
@@ -656,6 +656,7 @@
 
     MOVSD = _binaryop('MOVSD')
     MOVAPD = _binaryop('MOVAPD')
+    MOVDQA = _binaryop('MOVDQA')
     MOVDQU = _binaryop('MOVDQU')
     ADDSD = _binaryop('ADDSD')
     ADDPD = _binaryop('ADDPD')
@@ -675,6 +676,8 @@
 
     PADDQ = _binaryop('PADDQ')
     PADDD = _binaryop('PADDD')
+    PADDW = _binaryop('PADDW')
+    PADDB = _binaryop('PADDB')
     PSUBQ = _binaryop('PSUBQ')
     PAND  = _binaryop('PAND')
     POR   = _binaryop('POR')
diff --git a/rpython/jit/backend/x86/test/test_vectorize.py 
b/rpython/jit/backend/x86/test/test_vectorize.py
--- a/rpython/jit/backend/x86/test/test_vectorize.py
+++ b/rpython/jit/backend/x86/test/test_vectorize.py
@@ -26,14 +26,13 @@
         ptr[1] = rffi.r_int(b)
         ptr[2] = rffi.r_int(c)
         ptr[3] = rffi.r_int(d)
-        return ConstAddressLoc(adr,4)
+        return adr
 
     def test_simple_4_int_load_sum_x86_64(self):
         def callback(asm):
             if asm.mc.WORD != 8:
                 py.test.skip()
-            loc = self.imm_4_int32(123,543,0,0)
-            adr = loc.value
+            adr = self.imm_4_int32(123,543,0,0)
             asm.mc.MOV_ri(r8.value,adr)
             asm.mc.MOVDQU_xm(xmm7.value, (r8.value, 0))
             asm.mc.PADDD_xm(xmm7.value, (r8.value, 0))
@@ -55,8 +54,8 @@
 
     def test_vector_store(self):
         def callback(asm):
-            loc = self.imm_4_int32(11,12,13,14)
-            asm.mov(ImmedLoc(loc.value), ecx)
+            addr = self.imm_4_int32(11,12,13,14)
+            asm.mov(ImmedLoc(addr), ecx)
             asm.mc.MOVDQU_xm(xmm6.value, (ecx.value,0))
             asm.mc.PADDD_xm(xmm6.value, (ecx.value,0))
             asm.mc.MOVDQU(AddressLoc(ecx,ImmedLoc(0)), xmm6)
@@ -65,3 +64,17 @@
 
         res = self.do_test(callback) & 0xffffffff
         assert res == 22
+
+
+    def test_vector_store_aligned(self):
+        def callback(asm):
+            addr = self.imm_4_int32(11,12,13,14)
+            asm.mov(ImmedLoc(addr), ecx)
+            asm.mc.MOVDQA(xmm6, AddressLoc(ecx,ImmedLoc(0)))
+            asm.mc.PADDD_xm(xmm6.value, (ecx.value,0))
+            asm.mc.MOVDQA(AddressLoc(ecx,ImmedLoc(0)), xmm6)
+            asm.mc.MOVDQA(xmm6, AddressLoc(ecx,ImmedLoc(0)))
+            asm.mc.MOVDQ_rx(eax.value, xmm6.value)
+
+        res = self.do_test(callback) & 0xffffffff
+        assert res == 22
diff --git a/rpython/jit/metainterp/optimizeopt/__init__.py 
b/rpython/jit/metainterp/optimizeopt/__init__.py
--- a/rpython/jit/metainterp/optimizeopt/__init__.py
+++ b/rpython/jit/metainterp/optimizeopt/__init__.py
@@ -68,8 +68,7 @@
                                                           loop.operations)
         optimizations, unroll = build_opt_chain(metainterp_sd, enable_opts)
         if warmstate.vectorize and jitdriver_sd.vectorize:
-            optimize_vector(metainterp_sd, jitdriver_sd, loop,
-                                   optimizations)
+            optimize_vector(metainterp_sd, jitdriver_sd, loop, optimizations)
         elif unroll:
             return optimize_unroll(metainterp_sd, jitdriver_sd, loop,
                                    optimizations, inline_short_preamble,
diff --git a/rpython/jit/metainterp/optimizeopt/dependency.py 
b/rpython/jit/metainterp/optimizeopt/dependency.py
--- a/rpython/jit/metainterp/optimizeopt/dependency.py
+++ b/rpython/jit/metainterp/optimizeopt/dependency.py
@@ -2,7 +2,8 @@
 
 from rpython.jit.metainterp import compile
 from rpython.jit.metainterp.optimizeopt.util import make_dispatcher_method
-from rpython.jit.metainterp.resoperation import rop
+from rpython.jit.metainterp.resoperation import (rop, GuardResOp)
+from rpython.jit.metainterp.resume import Snapshot
 from rpython.jit.codewriter.effectinfo import EffectInfo
 from rpython.jit.metainterp.history import BoxPtr, ConstPtr, ConstInt, BoxInt, 
Box, Const
 from rpython.rtyper.lltypesystem import llmemory
@@ -85,30 +86,30 @@
         return self.op.getopname()
 
     def getfailarg_set(self):
-        args = set()
         op = self.getoperation()
+        assert isinstance(op, GuardResOp)
+        args = []
         if op.getfailargs():
             for arg in op.getfailargs():
-                args.add(arg)
+                args.append(arg)
             return args
         elif op.rd_snapshot:
             ss = op.rd_snapshot
-            while ss != None:
+            assert isinstance(ss, Snapshot)
+            while ss:
                 for box in ss.boxes:
-                    args.add(box)
+                    args.append(box)
                 ss = ss.prev
 
         return args
- #set(target_guard.getoperation().getfailargs())
 
 
     def relax_guard_to(self, guard):
         """ Relaxes a guard operation to an earlier guard. """
-        assert self.op.is_guard()
-        assert guard.is_guard()
-
         tgt_op = self.getoperation()
         op = guard
+        assert isinstance(tgt_op, GuardResOp)
+        assert isinstance(op, GuardResOp)
         #descr = compile.ResumeAtLoopHeaderDescr()
         descr = compile.ResumeAtLoopHeaderDescr()
         tgt_op.setdescr(descr)
@@ -357,7 +358,7 @@
         if len(def_chain) == 1:
             return def_chain[0][0]
         else:
-            if argcell == None:
+            if not argcell:
                 return def_chain[-1][0]
             else:
                 assert node is not None
@@ -445,7 +446,7 @@
                 for arg in op.getarglist():
                     tracker.define(arg, node)
                 continue # prevent adding edge to the label itself
-            intformod.inspect_operation(node)
+            intformod.inspect_operation(op,node)
             # definition of a new variable
             if op.result is not None:
                 # In SSA form. Modifications get a new variable
@@ -461,6 +462,7 @@
                 self._build_non_pure_dependencies(node, tracker)
         # pass 2 correct guard dependencies
         for guard_node in self.guards:
+            op = guard_node.getoperation()
             self._build_guard_dependencies(guard_node, op.getopnum(), tracker)
         # pass 3 find schedulable nodes
         jump_node = self.nodes[jump_pos]
@@ -673,14 +675,13 @@
         return False
 
     def get_or_create(self, arg):
-        var = self.index_vars.get(arg)
+        var = self.index_vars.get(arg, None)
         if not var:
             var = self.index_vars[arg] = IndexVar(arg)
         return var
 
     additive_func_source = """
-    def operation_{name}(self, node):
-        op = node.op
+    def operation_{name}(self, op, node):
         box_r = op.result
         if not box_r:
             return
@@ -708,8 +709,7 @@
     del additive_func_source
 
     multiplicative_func_source = """
-    def operation_{name}(self, node):
-        op = node.op
+    def operation_{name}(self, op, node):
         box_r = op.result
         if not box_r:
             return
@@ -741,8 +741,7 @@
     del multiplicative_func_source
 
     array_access_source = """
-    def operation_{name}(self, node):
-        op = node.getoperation()
+    def operation_{name}(self, op, node):
         descr = op.getdescr()
         idx_ref = self.get_or_create(op.getarg(1))
         node.memory_ref = MemoryRef(op, idx_ref, {raw_access})
@@ -753,10 +752,6 @@
     exec py.code.Source(array_access_source
            .format(name='RAW_STORE',raw_access=True)).compile()
     exec py.code.Source(array_access_source
-           .format(name='GETARRAYITEM_GC',raw_access=False)).compile()
-    exec py.code.Source(array_access_source
-           .format(name='SETARRAYITEM_GC',raw_access=False)).compile()
-    exec py.code.Source(array_access_source
            .format(name='GETARRAYITEM_RAW',raw_access=False)).compile()
     exec py.code.Source(array_access_source
            .format(name='SETARRAYITEM_RAW',raw_access=False)).compile()
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_util.py 
b/rpython/jit/metainterp/optimizeopt/test/test_util.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_util.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_util.py
@@ -314,6 +314,13 @@
     failargs_limit = 1000
     storedebug = None
 
+class FakeWarmState(object):
+    vectorize = True # default is on
+    def __init__(self, enable_opts):
+        self.enable_opts = enable_opts
+
+class FakeJitDriverStaticData(object):
+    vectorize = False
 
 class FakeMetaInterpStaticData(object):
 
@@ -364,9 +371,6 @@
 
 class BaseTest(object):
 
-    class DefaultFakeJitDriverStaticData(object):
-        vectorize = False
-
     def parse(self, s, boxkinds=None, want_fail_descr=True, postprocess=None):
         self.oparse = OpParser(s, self.cpu, self.namespace, 'lltype',
                                boxkinds,
@@ -410,12 +414,12 @@
             metainterp_sd.virtualref_info = self.vrefinfo
         if hasattr(self, 'callinfocollection'):
             metainterp_sd.callinfocollection = self.callinfocollection
-        jitdriver_sd = BaseTest.DefaultFakeJitDriverStaticData()
+        jitdriver_sd = FakeJitDriverStaticData()
         if hasattr(self, 'jitdriver_sd'):
             jitdriver_sd = self.jitdriver_sd
+        warmstate = FakeWarmState(self.enable_opts)
         #
-        return optimize_trace(metainterp_sd, jitdriver_sd, loop,
-                              self.enable_opts,
+        return optimize_trace(metainterp_sd, jitdriver_sd, loop, warmstate,
                               start_state=start_state,
                               export_state=export_state)
 
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_virtualstate.py 
b/rpython/jit/metainterp/optimizeopt/test/test_virtualstate.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_virtualstate.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_virtualstate.py
@@ -779,6 +779,8 @@
     def _do_optimize_bridge(self, bridge, call_pure_results):
         from rpython.jit.metainterp.optimizeopt import optimize_trace
         from rpython.jit.metainterp.optimizeopt.util import args_dict
+        from rpython.jit.metainterp.optimizeopt.test_util import 
(FakeWarmState,
+                FakeJitDriverSD)
 
         self.bridge = bridge
         bridge.call_pure_results = args_dict()
@@ -791,9 +793,8 @@
         if hasattr(self, 'callinfocollection'):
             metainterp_sd.callinfocollection = self.callinfocollection
         #
-        class FakeJitDriverSD(object):
-            vectorize = False
-        optimize_trace(metainterp_sd, FakeJitDriverSD(), bridge, 
self.enable_opts)
+        warmstate = FakeWarmState(self.enable_opts)
+        optimize_trace(metainterp_sd, FakeJitDriverSD(), bridge, warmstate)
 
         
     def optimize_bridge(self, loops, bridge, expected, expected_target='Loop', 
**boxvalues):
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py 
b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -8,7 +8,7 @@
 from rpython.jit.metainterp.optimizeopt.util import make_dispatcher_method
 from rpython.jit.metainterp.optimizeopt.dependency import (DependencyGraph, 
         MemoryRef, Scheduler, SchedulerData, Node)
-from rpython.jit.metainterp.resoperation import (rop, ResOperation)
+from rpython.jit.metainterp.resoperation import (rop, ResOperation, GuardResOp)
 from rpython.jit.metainterp.resume import Snapshot
 from rpython.rlib.debug import debug_print, debug_start, debug_stop
 from rpython.jit.metainterp.jitexc import JitException
@@ -24,7 +24,6 @@
             print arg,
         print
 
-
 def debug_print_operations(loop):
     if not we_are_translated():
         print('--- loop instr numbered ---')
@@ -46,7 +45,7 @@
     opt = VectorizingOptimizer(metainterp_sd, jitdriver_sd, loop, 
optimizations)
     try:
         opt.propagate_all_forward()
-        debug_print_operations(loop)
+        #debug_print_operations(loop)
         def_opt = Optimizer(metainterp_sd, jitdriver_sd, loop, optimizations)
         def_opt.propagate_all_forward()
     except NotAVectorizeableLoop:
@@ -68,7 +67,7 @@
         self.early_exit = None
         self.future_condition = None
 
-    def propagate_all_forward(self):
+    def propagate_all_forward(self, clear=True):
         self.clear_newoperations()
         label = self.loop.operations[0]
         jump = self.loop.operations[-1]
@@ -173,6 +172,7 @@
                 # to be adjusted. rd_snapshot stores the live variables
                 # that are needed to resume.
                 if copied_op.is_guard():
+                    assert isinstance(copied_op, GuardResOp)
                     snapshot = self.clone_snapshot(copied_op.rd_snapshot, 
rename_map)
                     copied_op.rd_snapshot = snapshot
                     if not we_are_translated():
@@ -293,7 +293,7 @@
     def follow_def_uses(self, pack):
         assert isinstance(pack, Pair)
         savings = -1
-        candidate = (-1,-1)
+        candidate = (None,None)
         for ldep in pack.left.provides():
             for rdep in pack.right.provides():
                 lnode = ldep.to
@@ -307,6 +307,8 @@
                         candidate = (lnode, rnode)
         #
         if savings >= 0:
+            assert candidate[0] is not None
+            assert candidate[1] is not None
             self.packset.add_pair(*candidate)
 
     def combine_packset(self):
@@ -336,13 +338,12 @@
                 break
 
     def schedule(self):
-        dprint(self.dependency_graph.as_dot())
         self.clear_newoperations()
         scheduler = Scheduler(self.dependency_graph, VecScheduleData())
-        dprint("scheduling loop. scheduleable are: " + 
str(scheduler.schedulable_nodes))
+        #dprint("scheduling loop. scheduleable are: " + 
str(scheduler.schedulable_nodes))
         while scheduler.has_more():
             candidate = scheduler.next()
-            dprint("  candidate", candidate, "has pack?", candidate.pack != 
None, "pack", candidate.pack)
+            #dprint("  candidate", candidate, "has pack?", candidate.pack != 
None, "pack", candidate.pack)
             if candidate.pack:
                 pack = candidate.pack
                 if scheduler.schedulable(pack.operations):
@@ -439,7 +440,7 @@
         self.box_to_vbox = {}
 
     def as_vector_operation(self, pack):
-        op_count = pack.operations
+        op_count = len(pack.operations)
         assert op_count > 1
         self.pack = pack
         # properties that hold for the pack are:
@@ -447,7 +448,7 @@
         op0 = pack.operations[0].getoperation()
         assert op0.vector != -1
         args = op0.getarglist()[:]
-        args.append(ConstInt(len(op_count)))
+        args.append(ConstInt(op_count))
         vop = ResOperation(op0.vector, args, op0.result, op0.getdescr())
         self._inspect_operation(vop)
         return vop
@@ -518,6 +519,7 @@
     """
     if l_op.getopnum() == r_op.getopnum():
         return True
+    return False
 
 class PackSet(object):
 
@@ -569,8 +571,6 @@
             if not must_unpack_result_to_exec(lpacknode, lnode) and \
                not must_unpack_result_to_exec(rpacknode, rnode):
                 savings += 1
-        if savings >= 0:
-            dprint("estimated " + str(savings) + " for lpack,lnode", 
lpacknode, lnode)
 
         return savings
 
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to