Author: Armin Rigo <[email protected]>
Branch: guard-compatible
Changeset: r83066:0cbabc844652
Date: 2016-03-15 10:08 +0100
http://bitbucket.org/pypy/pypy/changeset/0cbabc844652/
Log: Add more flexibility with (compilation-time) tweakable parameters
diff --git a/rpython/jit/backend/test/runner_test.py
b/rpython/jit/backend/test/runner_test.py
--- a/rpython/jit/backend/test/runner_test.py
+++ b/rpython/jit/backend/test/runner_test.py
@@ -217,7 +217,7 @@
self.cpu.grow_guard_compatible_switch(looptoken.compiled_loop_token,
faildescr1, t2_box._resref)
- for retry in range(2):
+ for retry in range(5):
deadframe = self.cpu.execute_token(looptoken,
t2_box._resref)
fail = self.cpu.get_latest_descr(deadframe)
diff --git a/rpython/jit/backend/x86/guard_compat.py
b/rpython/jit/backend/x86/guard_compat.py
--- a/rpython/jit/backend/x86/guard_compat.py
+++ b/rpython/jit/backend/x86/guard_compat.py
@@ -15,6 +15,22 @@
# the guard, ending in -1.
+# --tweakable parameters (you get the effect closest to before we had
+# guard-compat by setting GROW_POSITION to 1 and UPDATE_ASM to 0)--
+
+# where grow_switch puts the new value:
+# 0 = at the beginning of the list
+# 1 = at position N-1, just before the initial value which stays last
+# 2 = at the end
+GROW_POSITION = 2
+
+# when guard_compatible's slow path is called and finds a value, when
+# should we update the machine code to make this value the fast-path?
+# 0 = never
+# another value = after about this many calls to the slow-path
+UPDATE_ASM = 1291
+
+
def generate_guard_compatible(assembler, guard_token, loc_reg, initial_value):
# fast-path check
mc = assembler.mc
@@ -49,9 +65,10 @@
mc.stack_frame_size_delta(-WORD)
small_ofs = rel_pos_compatible_imm - mc.get_relative_pos()
- compatinfo[0] = small_ofs
+ assert -128 <= small_ofs < 128
+ compatinfo[0] = small_ofs & 0xFF
- assembler.guard_success_cc = rx86.Conditions['NZ']
+ assembler.guard_success_cc = rx86.Conditions['Z']
assembler.implement_guard(guard_token)
#
# patch the JE above
@@ -99,10 +116,22 @@
newcompatinfo = rffi.cast(rffi.SIGNEDP, newcompatinfoaddr)
newcompatinfo[0] = compatinfo[0]
- newcompatinfo[1] = new_value
- for i in range(1, length):
- newcompatinfo[i + 1] = compatinfo[i]
+ if GROW_POSITION == 0:
+ newcompatinfo[1] = new_value
+ for i in range(1, length):
+ newcompatinfo[i + 1] = compatinfo[i]
+ elif GROW_POSITION == 1:
+ for i in range(1, length - 2):
+ newcompatinfo[i] = compatinfo[i]
+ newcompatinfo[length - 2] = new_value
+ newcompatinfo[length - 1] = compatinfo[length - 2]
+ newcompatinfo[length] = -1 # == compatinfo[length - 1]
+ else:
+ for i in range(1, length - 1):
+ newcompatinfo[i] = compatinfo[i]
+ newcompatinfo[length - 1] = new_value
+ newcompatinfo[length] = -1 # == compatinfo[length - 1]
# the old 'compatinfo' is not used any more, but will only be freed
# when the looptoken is freed
@@ -117,6 +146,36 @@
assembler._guard_compat_checkers = [0] * nb_registers
+def _build_inner_loop(mc, regnum, tmp, immediate_return):
+ pos = mc.get_relative_pos()
+ mc.CMP_mr((tmp, WORD), regnum)
+ mc.J_il8(rx86.Conditions['E'], 0) # patched below
+ je_location = mc.get_relative_pos()
+ mc.CMP_mi((tmp, WORD), -1)
+ mc.LEA_rm(tmp, (tmp, WORD))
+ mc.J_il8(rx86.Conditions['NE'], pos - (mc.get_relative_pos() + 2))
+ #
+ # not found! Return the condition code 'Not Zero' to mean 'not found'.
+ mc.OR_rr(tmp, tmp)
+ #
+ # if 'immediate_return', patch the JE above to jump here. When we
+ # follow that path, we get condition code 'Zero', which means 'found'.
+ if immediate_return:
+ offset = mc.get_relative_pos() - je_location
+ assert 0 < offset <= 127
+ mc.overwrite(je_location-1, chr(offset))
+ #
+ if IS_X86_32:
+ mc.POP_r(tmp)
+ mc.RET16_i(WORD)
+ mc.force_frame_size(8) # one word on X86_64, two words on X86_32
+ #
+ # if not 'immediate_return', patch the JE above to jump here.
+ if not immediate_return:
+ offset = mc.get_relative_pos() - je_location
+ assert 0 < offset <= 127
+ mc.overwrite(je_location-1, chr(offset))
+
def get_or_build_checker(assembler, regnum):
"""Returns a piece of assembler that checks if the value is in
some array (there is one such piece per input register 'regnum')
@@ -142,40 +201,43 @@
mc.MOV_rs(tmp, stack_arg)
- pos = mc.get_relative_pos()
- mc.CMP_mr((tmp, WORD), regnum)
- mc.J_il8(rx86.Conditions['E'], 0) # patched below
- je_location = mc.get_relative_pos()
- mc.CMP_mi((tmp, WORD), -1)
- mc.LEA_rm(tmp, (tmp, WORD))
- mc.J_il8(rx86.Conditions['NE'], pos - (mc.get_relative_pos() + 2))
+ if UPDATE_ASM > 0:
+ CONST_TO_ADD = int((1 << 24) / (UPDATE_ASM + 0.3))
+ if CONST_TO_ADD >= (1 << 23):
+ CONST_TO_ADD = (1 << 23) - 1
+ if CONST_TO_ADD < 1:
+ CONST_TO_ADD = 1
+ CONST_TO_ADD <<= 8
+ #
+ mc.ADD32_mi32((tmp, 0), CONST_TO_ADD)
+ mc.J_il8(rx86.Conditions['C'], 0) # patched below
+ jc_location = mc.get_relative_pos()
+ else:
+ jc_location = -1
- # not found! The condition code is already 'Zero', which we return
- # to mean 'not found'.
- if IS_X86_32:
- mc.POP_r(tmp)
- mc.RET16_i(WORD)
+ _build_inner_loop(mc, regnum, tmp, immediate_return=True)
- mc.force_frame_size(8) # one word on X86_64, two words on X86_32
-
- # patch the JE above
- offset = mc.get_relative_pos() - je_location
- assert 0 < offset <= 127
- mc.overwrite(je_location-1, chr(offset))
-
- # found! update the assembler by writing the value at 'small_ofs'
- # bytes before our return address. This should overwrite the const in
- # 'MOV_ri64(r11, const)', first instruction of the guard_compatible.
- mc.MOV_rs(tmp, stack_arg)
- mc.MOV_rm(tmp, (tmp, 0))
- mc.ADD_rs(tmp, stack_ret)
- mc.MOV_mr((tmp, -WORD), regnum)
-
- # the condition codes say 'Not Zero', as a result of the ADD above.
- # Return this condition code to mean 'found'.
- if IS_X86_32:
- mc.POP_r(tmp)
- mc.RET16_i(WORD)
+ if jc_location != -1:
+ # patch the JC above
+ offset = mc.get_relative_pos() - jc_location
+ assert 0 < offset <= 127
+ mc.overwrite(jc_location-1, chr(offset))
+ #
+ _build_inner_loop(mc, regnum, tmp, immediate_return=False)
+ #
+ # found! update the assembler by writing the value at 'small_ofs'
+ # bytes before our return address. This should overwrite the const in
+ # 'MOV_ri64(r11, const)', first instruction of the guard_compatible.
+ mc.MOV_rs(tmp, stack_arg)
+ mc.MOVSX8_rm(tmp, (tmp, 0))
+ mc.ADD_rs(tmp, stack_ret)
+ mc.MOV_mr((tmp, -WORD), regnum)
+ #
+ # Return condition code 'Zero' to mean 'found'.
+ mc.CMP_rr(regnum, regnum)
+ if IS_X86_32:
+ mc.POP_r(tmp)
+ mc.RET16_i(WORD)
addr = mc.materialize(assembler.cpu, [])
assembler._guard_compat_checkers[regnum] = addr
diff --git a/rpython/jit/backend/x86/rx86.py b/rpython/jit/backend/x86/rx86.py
--- a/rpython/jit/backend/x86/rx86.py
+++ b/rpython/jit/backend/x86/rx86.py
@@ -514,6 +514,8 @@
XOR_ri,XOR_rr,XOR_rb,_,_,XOR_rm,XOR_rj,_,_,_,_ = common_modes(6)
CMP_ri,CMP_rr,CMP_rb,CMP_bi,CMP_br,CMP_rm,CMP_rj,_,_,_,CMP_ri32 =
common_modes(7)
+ ADD32_mi32 = insn(rex_nw, '\x81', mem_reg_plus_const(1), immediate(2))
+
def ADD_ri(self, reg, immed):
self.AD1_ri(reg, immed)
if reg == R.esp:
diff --git a/rpython/jit/backend/x86/test/test_runner.py
b/rpython/jit/backend/x86/test/test_runner.py
--- a/rpython/jit/backend/x86/test/test_runner.py
+++ b/rpython/jit/backend/x86/test/test_runner.py
@@ -13,6 +13,7 @@
from rpython.jit.backend.test.runner_test import LLtypeBackendTest
from rpython.jit.tool.oparser import parse
import ctypes
+from hypothesis import strategies, given
CPU = getcpuclass()
@@ -556,6 +557,52 @@
assert self.cpu.get_int_value(deadframe, 2) == 42
assert self.cpu.get_int_value(deadframe, 3) == 42
+ @given(strategies.integers(min_value=0, max_value=2),
+ strategies.integers(min_value=0),
+ strategies.lists(strategies.integers()))
+ def test_guard_compatible_extra(self, grow_position, update_asm, lst):
+ from rpython.jit.backend.x86 import guard_compat
+ saved = guard_compat.GROW_POSITION, guard_compat.UPDATE_ASM
+ try:
+ guard_compat.GROW_POSITION = grow_position
+ guard_compat.UPDATE_ASM = update_asm
+
+ t1_box, T1_box, d1 = self.alloc_instance(self.T)
+ faildescr1 = BasicFailDescr(1)
+ loop = parse("""
+ [p0]
+ guard_compatible(p0, ConstPtr(t1), descr=faildescr1) []
+ finish(p0, descr=fdescr)
+ """, namespace={'fdescr': BasicFinalDescr(2),
+ 'faildescr1': faildescr1,
+ 't1': t1_box._resref})
+ looptoken = JitCellToken()
+ self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken)
+
+ def run(box):
+ deadframe = self.cpu.execute_token(looptoken,
+ box._resref)
+ fail = self.cpu.get_latest_descr(deadframe)
+ return fail.identifier
+
+ choices = {0: t1_box}
+
+ for operation in lst:
+ if operation >= 0 or (-operation) in choices:
+ if operation in choices:
+ assert run(choices[operation]) == 2
+ else:
+ t2_box, T2_box, d2 = self.alloc_instance(self.T)
+ assert run(t2_box) == 1
+ else:
+ t2_box, T2_box, d2 = self.alloc_instance(self.T)
+ self.cpu.grow_guard_compatible_switch(
+ looptoken.compiled_loop_token,
+ faildescr1, t2_box._resref)
+ choices[-operation] = t2_box
+ finally:
+ guard_compat.GROW_POSITION, guard_compat.UPDATE_ASM = saved
+
class TestDebuggingAssembler(object):
def setup_method(self, meth):
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit