Author: Armin Rigo <[email protected]>
Branch: guard-compatible
Changeset: r83066:0cbabc844652
Date: 2016-03-15 10:08 +0100
http://bitbucket.org/pypy/pypy/changeset/0cbabc844652/

Log:    Add more flexibility with (compilation-time) tweakable parameters

diff --git a/rpython/jit/backend/test/runner_test.py 
b/rpython/jit/backend/test/runner_test.py
--- a/rpython/jit/backend/test/runner_test.py
+++ b/rpython/jit/backend/test/runner_test.py
@@ -217,7 +217,7 @@
 
         self.cpu.grow_guard_compatible_switch(looptoken.compiled_loop_token,
                                               faildescr1, t2_box._resref)
-        for retry in range(2):
+        for retry in range(5):
             deadframe = self.cpu.execute_token(looptoken,
                                                t2_box._resref)
             fail = self.cpu.get_latest_descr(deadframe)
diff --git a/rpython/jit/backend/x86/guard_compat.py 
b/rpython/jit/backend/x86/guard_compat.py
--- a/rpython/jit/backend/x86/guard_compat.py
+++ b/rpython/jit/backend/x86/guard_compat.py
@@ -15,6 +15,22 @@
 # the guard, ending in -1.
 
 
+# --tweakable parameters (you get the effect closest to before we had
+# guard-compat by setting GROW_POSITION to 1 and UPDATE_ASM to 0)--
+
+# where grow_switch puts the new value:
+#   0 = at the beginning of the list
+#   1 = at position N-1, just before the initial value which stays last
+#   2 = at the end
+GROW_POSITION = 2
+
+# when guard_compatible's slow path is called and finds a value, when
+# should we update the machine code to make this value the fast-path?
+#   0 = never
+#   another value = after about this many calls to the slow-path
+UPDATE_ASM = 1291
+
+
 def generate_guard_compatible(assembler, guard_token, loc_reg, initial_value):
     # fast-path check
     mc = assembler.mc
@@ -49,9 +65,10 @@
     mc.stack_frame_size_delta(-WORD)
 
     small_ofs = rel_pos_compatible_imm - mc.get_relative_pos()
-    compatinfo[0] = small_ofs
+    assert -128 <= small_ofs < 128
+    compatinfo[0] = small_ofs & 0xFF
 
-    assembler.guard_success_cc = rx86.Conditions['NZ']
+    assembler.guard_success_cc = rx86.Conditions['Z']
     assembler.implement_guard(guard_token)
     #
     # patch the JE above
@@ -99,10 +116,22 @@
 
     newcompatinfo = rffi.cast(rffi.SIGNEDP, newcompatinfoaddr)
     newcompatinfo[0] = compatinfo[0]
-    newcompatinfo[1] = new_value
 
-    for i in range(1, length):
-        newcompatinfo[i + 1] = compatinfo[i]
+    if GROW_POSITION == 0:
+        newcompatinfo[1] = new_value
+        for i in range(1, length):
+            newcompatinfo[i + 1] = compatinfo[i]
+    elif GROW_POSITION == 1:
+        for i in range(1, length - 2):
+            newcompatinfo[i] = compatinfo[i]
+        newcompatinfo[length - 2] = new_value
+        newcompatinfo[length - 1] = compatinfo[length - 2]
+        newcompatinfo[length] = -1    # == compatinfo[length - 1]
+    else:
+        for i in range(1, length - 1):
+            newcompatinfo[i] = compatinfo[i]
+        newcompatinfo[length - 1] = new_value
+        newcompatinfo[length] = -1    # == compatinfo[length - 1]
 
     # the old 'compatinfo' is not used any more, but will only be freed
     # when the looptoken is freed
@@ -117,6 +146,36 @@
     assembler._guard_compat_checkers = [0] * nb_registers
 
 
+def _build_inner_loop(mc, regnum, tmp, immediate_return):
+    pos = mc.get_relative_pos()
+    mc.CMP_mr((tmp, WORD), regnum)
+    mc.J_il8(rx86.Conditions['E'], 0)    # patched below
+    je_location = mc.get_relative_pos()
+    mc.CMP_mi((tmp, WORD), -1)
+    mc.LEA_rm(tmp, (tmp, WORD))
+    mc.J_il8(rx86.Conditions['NE'], pos - (mc.get_relative_pos() + 2))
+    #
+    # not found!  Return the condition code 'Not Zero' to mean 'not found'.
+    mc.OR_rr(tmp, tmp)
+    #
+    # if 'immediate_return', patch the JE above to jump here.  When we
+    # follow that path, we get condition code 'Zero', which means 'found'.
+    if immediate_return:
+        offset = mc.get_relative_pos() - je_location
+        assert 0 < offset <= 127
+        mc.overwrite(je_location-1, chr(offset))
+    #
+    if IS_X86_32:
+        mc.POP_r(tmp)
+    mc.RET16_i(WORD)
+    mc.force_frame_size(8)   # one word on X86_64, two words on X86_32
+    #
+    # if not 'immediate_return', patch the JE above to jump here.
+    if not immediate_return:
+        offset = mc.get_relative_pos() - je_location
+        assert 0 < offset <= 127
+        mc.overwrite(je_location-1, chr(offset))
+
 def get_or_build_checker(assembler, regnum):
     """Returns a piece of assembler that checks if the value is in
     some array (there is one such piece per input register 'regnum')
@@ -142,40 +201,43 @@
 
     mc.MOV_rs(tmp, stack_arg)
 
-    pos = mc.get_relative_pos()
-    mc.CMP_mr((tmp, WORD), regnum)
-    mc.J_il8(rx86.Conditions['E'], 0)    # patched below
-    je_location = mc.get_relative_pos()
-    mc.CMP_mi((tmp, WORD), -1)
-    mc.LEA_rm(tmp, (tmp, WORD))
-    mc.J_il8(rx86.Conditions['NE'], pos - (mc.get_relative_pos() + 2))
+    if UPDATE_ASM > 0:
+        CONST_TO_ADD = int((1 << 24) / (UPDATE_ASM + 0.3))
+        if CONST_TO_ADD >= (1 << 23):
+            CONST_TO_ADD = (1 << 23) - 1
+        if CONST_TO_ADD < 1:
+            CONST_TO_ADD = 1
+        CONST_TO_ADD <<= 8
+        #
+        mc.ADD32_mi32((tmp, 0), CONST_TO_ADD)
+        mc.J_il8(rx86.Conditions['C'], 0)    # patched below
+        jc_location = mc.get_relative_pos()
+    else:
+        jc_location = -1
 
-    # not found!  The condition code is already 'Zero', which we return
-    # to mean 'not found'.
-    if IS_X86_32:
-        mc.POP_r(tmp)
-    mc.RET16_i(WORD)
+    _build_inner_loop(mc, regnum, tmp, immediate_return=True)
 
-    mc.force_frame_size(8)   # one word on X86_64, two words on X86_32
-
-    # patch the JE above
-    offset = mc.get_relative_pos() - je_location
-    assert 0 < offset <= 127
-    mc.overwrite(je_location-1, chr(offset))
-
-    # found!  update the assembler by writing the value at 'small_ofs'
-    # bytes before our return address.  This should overwrite the const in
-    # 'MOV_ri64(r11, const)', first instruction of the guard_compatible.
-    mc.MOV_rs(tmp, stack_arg)
-    mc.MOV_rm(tmp, (tmp, 0))
-    mc.ADD_rs(tmp, stack_ret)
-    mc.MOV_mr((tmp, -WORD), regnum)
-
-    # the condition codes say 'Not Zero', as a result of the ADD above.
-    # Return this condition code to mean 'found'.
-    if IS_X86_32:
-        mc.POP_r(tmp)
-    mc.RET16_i(WORD)
+    if jc_location != -1:
+        # patch the JC above
+        offset = mc.get_relative_pos() - jc_location
+        assert 0 < offset <= 127
+        mc.overwrite(jc_location-1, chr(offset))
+        #
+        _build_inner_loop(mc, regnum, tmp, immediate_return=False)
+        #
+        # found!  update the assembler by writing the value at 'small_ofs'
+        # bytes before our return address.  This should overwrite the const in
+        # 'MOV_ri64(r11, const)', first instruction of the guard_compatible.
+        mc.MOV_rs(tmp, stack_arg)
+        mc.MOVSX8_rm(tmp, (tmp, 0))
+        mc.ADD_rs(tmp, stack_ret)
+        mc.MOV_mr((tmp, -WORD), regnum)
+        #
+        # Return condition code 'Zero' to mean 'found'.
+        mc.CMP_rr(regnum, regnum)
+        if IS_X86_32:
+            mc.POP_r(tmp)
+        mc.RET16_i(WORD)
 
     addr = mc.materialize(assembler.cpu, [])
     assembler._guard_compat_checkers[regnum] = addr
diff --git a/rpython/jit/backend/x86/rx86.py b/rpython/jit/backend/x86/rx86.py
--- a/rpython/jit/backend/x86/rx86.py
+++ b/rpython/jit/backend/x86/rx86.py
@@ -514,6 +514,8 @@
     XOR_ri,XOR_rr,XOR_rb,_,_,XOR_rm,XOR_rj,_,_,_,_ = common_modes(6)
     CMP_ri,CMP_rr,CMP_rb,CMP_bi,CMP_br,CMP_rm,CMP_rj,_,_,_,CMP_ri32 = 
common_modes(7)
 
+    ADD32_mi32 = insn(rex_nw, '\x81', mem_reg_plus_const(1), immediate(2))
+
     def ADD_ri(self, reg, immed):
         self.AD1_ri(reg, immed)
         if reg == R.esp:
diff --git a/rpython/jit/backend/x86/test/test_runner.py 
b/rpython/jit/backend/x86/test/test_runner.py
--- a/rpython/jit/backend/x86/test/test_runner.py
+++ b/rpython/jit/backend/x86/test/test_runner.py
@@ -13,6 +13,7 @@
 from rpython.jit.backend.test.runner_test import LLtypeBackendTest
 from rpython.jit.tool.oparser import parse
 import ctypes
+from hypothesis import strategies, given
 
 CPU = getcpuclass()
 
@@ -556,6 +557,52 @@
             assert self.cpu.get_int_value(deadframe, 2) == 42
             assert self.cpu.get_int_value(deadframe, 3) == 42
 
+    @given(strategies.integers(min_value=0, max_value=2),
+           strategies.integers(min_value=0),
+           strategies.lists(strategies.integers()))
+    def test_guard_compatible_extra(self, grow_position, update_asm, lst):
+        from rpython.jit.backend.x86 import guard_compat
+        saved = guard_compat.GROW_POSITION, guard_compat.UPDATE_ASM
+        try:
+            guard_compat.GROW_POSITION = grow_position
+            guard_compat.UPDATE_ASM = update_asm
+
+            t1_box, T1_box, d1 = self.alloc_instance(self.T)
+            faildescr1 = BasicFailDescr(1)
+            loop = parse("""
+            [p0]
+            guard_compatible(p0, ConstPtr(t1), descr=faildescr1) []
+            finish(p0, descr=fdescr)
+            """, namespace={'fdescr': BasicFinalDescr(2),
+                            'faildescr1': faildescr1,
+                            't1': t1_box._resref})
+            looptoken = JitCellToken()
+            self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken)
+
+            def run(box):
+                deadframe = self.cpu.execute_token(looptoken,
+                                                   box._resref)
+                fail = self.cpu.get_latest_descr(deadframe)
+                return fail.identifier
+
+            choices = {0: t1_box}
+
+            for operation in lst:
+                if operation >= 0 or (-operation) in choices:
+                    if operation in choices:
+                        assert run(choices[operation]) == 2
+                    else:
+                        t2_box, T2_box, d2 = self.alloc_instance(self.T)
+                        assert run(t2_box) == 1
+                else:
+                    t2_box, T2_box, d2 = self.alloc_instance(self.T)
+                    self.cpu.grow_guard_compatible_switch(
+                        looptoken.compiled_loop_token,
+                        faildescr1, t2_box._resref)
+                    choices[-operation] = t2_box
+        finally:
+            guard_compat.GROW_POSITION, guard_compat.UPDATE_ASM = saved
+
 
 class TestDebuggingAssembler(object):
     def setup_method(self, meth):
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to