Author: Armin Rigo <[email protected]>
Branch: guard-compatible
Changeset: r84571:d0ffd51b5d0b
Date: 2016-05-22 16:41 +0200
http://bitbucket.org/pypy/pypy/changeset/d0ffd51b5d0b/
Log: Implement search_tree
diff --git a/rpython/jit/backend/x86/guard_compat.py
b/rpython/jit/backend/x86/guard_compat.py
--- a/rpython/jit/backend/x86/guard_compat.py
+++ b/rpython/jit/backend/x86/guard_compat.py
@@ -3,11 +3,14 @@
from rpython.rlib.rarithmetic import r_uint
from rpython.rtyper.lltypesystem import lltype, llmemory, rffi
from rpython.rtyper.lltypesystem.lloperation import llop
-from rpython.rtyper.annlowlevel import cast_instance_to_gcref
+from rpython.rtyper.annlowlevel import cast_instance_to_gcref, llhelper
from rpython.rtyper.annlowlevel import cast_gcref_to_instance
from rpython.translator.tool.cbuild import ExternalCompilationInfo
+from rpython.jit.metainterp.compile import GuardCompatibleDescr
from rpython.jit.backend.llsupport import jitframe
-from rpython.jit.metainterp.compile import GuardCompatibleDescr
+from rpython.jit.backend.x86 import rx86, codebuf, regloc
+from rpython.jit.backend.x86.regalloc import gpr_reg_mgr_cls
+from rpython.jit.backend.x86.arch import WORD, DEFAULT_FRAME_BYTES
#
@@ -22,10 +25,10 @@
# JNE slow_case
# JMP *[reg2 + bc_most_recent + 8]
# slow_case:
+# PUSH RDX # save
# PUSH RAX # save
-# PUSH RDX # save
-# MOV RAX, reg # the value to search for
-# MOV RDX, reg2 # _backend_choices object
+# MOV RDX=reg2, RAX=reg
+# RDX is the _backend_choices object, RAX is the value to search for
# JMP search_tree # see below
# sequel:
#
@@ -96,7 +99,7 @@
# JNE left
#
# found:
-# MOV R11, [RDX + 8]
+# MOV R11, [RDX + 8*R11]
# MOV RDX, [RSP+16]
# MOV [RDX + bc_most_recent], RAX
# MOV [RDX + bc_most_recent + 8], R11
@@ -107,10 +110,10 @@
# not_found:
# <save all registers to the jitframe RBP,
# reading and popping the original RAX and RDX off the stack>
-# MOV RDX, [RSP]
-# MOV R11, [RDX + bc_gcmap]
+# MOV RDI, [RSP]
+# MOV R11, [RDI + bc_gcmap]
# MOV [RBP + jf_gcmap], R11
-# <call invoke_find_compatible(_backend_choices=RDX, value=RAX)>
+# <call invoke_find_compatible(_backend_choices=RDI, value=RAX)>
# <_reload_frame_if_necessary>
# MOV R11, RAX
# <restore the non-saved registers>
@@ -161,9 +164,13 @@
('bc_most_recent', PAIR),
('bc_list', lltype.Array(PAIR)))
[email protected]()
-def getofs(name):
+def _getofs(name):
return llmemory.offsetof(BACKEND_CHOICES, name)
+BCGCMAP = _getofs('bc_gcmap')
+BCFAILDESCR = _getofs('bc_faildescr')
+BCMOSTRECENT = _getofs('bc_most_recent')
+BCLIST = _getofs('bc_list')
+del _getofs
BCLISTLENGTHOFS = llmemory.arraylengthoffset(BACKEND_CHOICES.bc_list)
BCLISTITEMSOFS = llmemory.itemoffsetof(BACKEND_CHOICES.bc_list, 0)
PAIRSIZE = llmemory.sizeof(PAIR)
@@ -180,10 +187,10 @@
return old != new
def bchoices_trace(gc, obj_addr, callback, arg):
- gc._trace_callback(callback, arg, obj_addr + getofs('bc_faildescr'))
- bchoices_pair(gc, obj_addr + getofs('bc_most_recent'), callback, arg)
- length = (obj_addr + getofs('bc_list') + BCLISTLENGTHOFS).signed[0]
- array_addr = obj_addr + getofs('bc_list') + BCLISTITEMSOFS
+ gc._trace_callback(callback, arg, obj_addr + BCFAILDESCR)
+ bchoices_pair(gc, obj_addr + BCMOSTRECENT, callback, arg)
+ length = (obj_addr + BCLIST + BCLISTLENGTHOFS).signed[0]
+ array_addr = obj_addr + BCLIST + BCLISTITEMSOFS
item_addr = array_addr
i = 0
changes = False
@@ -219,10 +226,15 @@
compilation_info=eci)
+INVOKE_FIND_COMPATIBLE_FUNC = lltype.Ptr(lltype.FuncType(
+ [lltype.Ptr(BACKEND_CHOICES), llmemory.GCREF],
+ lltype.Signed))
+
def invoke_find_compatible(bchoices, new_gcref):
descr = bchoices.bc_faildescr
descr = cast_gcref_to_instance(GuardCompatibleDescr, descr)
try:
+ xxx # temp
result = descr.find_compatible(cpu, new_gcref)
if result == 0:
result = descr._backend_failure_recovery
@@ -235,6 +247,9 @@
bchoices.bc_most_recent.asmaddr = result
return result
except: # oops!
+ if not we_are_translated():
+ import sys, pdb
+ pdb.post_mortem(sys.exc_info()[2])
return descr._backend_failure_recovery
def add_in_tree(bchoices, new_gcref, new_asmaddr):
@@ -242,7 +257,7 @@
length = len(bchoices.bc_list)
#
gcref_base = lltype.cast_opaque_ptr(llmemory.GCREF, bchoices)
- ofs = getofs('bc_list') + BCLISTITEMSOFS
+ ofs = BCLIST + BCLISTITEMSOFS
ofs += (length - 1) * llmemory.sizeof(PAIR)
ofs = _real_number(ofs)
if llop.raw_load(lltype.Unsigned, gcref_base, ofs) != r_uint(-1):
@@ -273,7 +288,7 @@
bchoices.bc_list[length - 1].asmaddr = new_asmaddr
# --- no GC above ---
addr = llmemory.cast_ptr_to_adr(bchoices)
- addr += getofs('bc_list') + BCLISTITEMSOFS
+ addr += BCLIST + BCLISTITEMSOFS
pairs_quicksort(addr, length)
return bchoices
@@ -307,11 +322,98 @@
def invalidate_cache(bchoices):
"""Write -1 inside bchoices.bc_most_recent.gcref."""
- ofs = llmemory.offsetof(BACKEND_CHOICES, 'bc_most_recent')
- invalidate_pair(bchoices, ofs)
+ invalidate_pair(bchoices, BCMOSTRECENT)
+def _fix_forward_label(mc, jmp_location):
+ offset = mc.get_relative_pos() - jmp_location
+ assert 0 < offset <= 127
+ mc.overwrite(jmp_location-1, chr(offset))
+def setup_once(assembler):
+ rax = regloc.eax.value
+ rdx = regloc.edx.value
+ rdi = regloc.edi.value
+ r11 = regloc.r11.value
+ frame_size = DEFAULT_FRAME_BYTES + 2 * WORD
+ # contains two extra words on the stack:
+ # - saved RDX
+ # - saved RAX
+
+ mc = codebuf.MachineCodeBlockWrapper()
+ mc.force_frame_size(frame_size)
+
+ ofs1 = _real_number(BCLIST + BCLISTLENGTHOFS)
+ ofs2 = _real_number(BCLIST + BCLISTITEMSOFS)
+ mc.MOV_sr(16, rdx) # MOV [RSP+16], RDX
+ mc.MOV_rm(r11, (rdx, ofs1)) # MOV R11, [RDX + bc_list.length]
+ mc.ADD_ri(rdx, ofs2) # ADD RDX, $bc_list.items
+ mc.JMP_l8(0) # JMP loop
+ jmp_location = mc.get_relative_pos()
+ mc.force_frame_size(frame_size)
+
+ right_label = mc.get_relative_pos()
+ mc.LEA_ra(rdx, (rdx, r11, 3, 8)) # LEA RDX, [RDX + 8*R11 + 8]
+ left_label = mc.get_relative_pos()
+ mc.SHR_ri(r11, 1) # SHR R11, 1
+ mc.J_il8(rx86.Conditions['Z'], 0) # JZ not_found
+ jz_location = mc.get_relative_pos()
+
+ _fix_forward_label(mc, jmp_location) # loop:
+ mc.CMP_ra(rax, (rdx, r11, 3, -8)) # CMP RAX, [RDX + 8*R11 - 8]
+ mc.J_il8(rx86.Conditions['A'], right_label - (mc.get_relative_pos() + 2))
+ mc.J_il8(rx86.Conditions['NE'], left_label - (mc.get_relative_pos() + 2))
+
+ mc.MOV_ra(r11, (rdx, r11, 3, 0)) # MOV R11, [RDX + 8*R11]
+ mc.MOV_rs(rdx, 16) # MOV RDX, [RSP+16]
+ ofs = _real_number(BCMOSTRECENT)
+ mc.MOV_mr((rdx, ofs), rax) # MOV [RDX+bc_most_recent], RAX
+ mc.MOV_mr((rdx, ofs + 8), r11) # MOV [RDX+bc_most_recent+8], R11
+ mc.POP_r(rax) # POP RAX
+ mc.POP_r(rdx) # POP RDX
+ mc.JMP_r(r11) # JMP *R11
+ mc.force_frame_size(frame_size)
+
+ _fix_forward_label(mc, jz_location) # not_found:
+
+ # read and pop the original RAX and RDX off the stack
+ base_ofs = assembler.cpu.get_baseofs_of_frame_field()
+ v = gpr_reg_mgr_cls.all_reg_indexes[rdx]
+ mc.POP_b(v * WORD + base_ofs) # POP [RBP + saved_rdx]
+ v = gpr_reg_mgr_cls.all_reg_indexes[rax]
+ mc.POP_b(v * WORD + base_ofs) # POP [RBP + saved_rax]
+ # save all other registers to the jitframe RBP
+ assembler._push_all_regs_to_frame(mc, [regloc.eax, regloc.edx],
+ withfloats=True)
+
+ bc_gcmap = _real_number(BCGCMAP)
+ jf_gcmap = assembler.cpu.get_ofs_of_frame_field('jf_gcmap')
+ mc.MOV_rs(rdi, 0) # MOV RDI, [RSP]
+ mc.MOV_rr(regloc.esi.value, rax) # MOV RSI, RAX
+ mc.MOV_rm(r11, (rdi, bc_gcmap)) # MOV R11, [RDI + bc_gcmap]
+ mc.MOV_br(jf_gcmap, r11) # MOV [RBP + jf_gcmap], R11
+ llfunc = llhelper(INVOKE_FIND_COMPATIBLE_FUNC, invoke_find_compatible)
+ llfunc = assembler.cpu.cast_ptr_to_int(llfunc)
+ mc.CALL(regloc.imm(llfunc)) # CALL invoke_find_compatible
+ assembler._reload_frame_if_necessary(mc)
+ mc.MOV_bi(jf_gcmap, 0) # MOV [RBP + jf_gcmap], 0
+
+ mc.MOV_rr(r11, rax) # MOV R11, RAX
+
+ # restore the registers that the CALL has clobbered. Other other
+ # registers are saved above, for the gcmap, but don't need to be
+ # restored here. (We restore RAX and RDX too.)
+ assembler._pop_all_regs_from_frame(mc, [], withfloats=True,
+ callee_only=True)
+ mc.JMP_r(r11) # JMP *R11
+
+ assembler.guard_compat_search_tree = mc.materialize(assembler.cpu, [])
+
+
+
+
+
+# ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
def generate_guard_compatible(assembler, guard_token, loc_reg, initial_value):
# fast-path check
@@ -424,11 +526,6 @@
# guard_compatible to update it if needed.
-def setup_once(assembler):
- nb_registers = WORD * 2
- assembler._guard_compat_checkers = [0] * nb_registers
-
-
def _build_inner_loop(mc, regnum, tmp, immediate_return):
pos = mc.get_relative_pos()
mc.CMP_mr((tmp, WORD), regnum)
diff --git a/rpython/jit/backend/x86/rx86.py b/rpython/jit/backend/x86/rx86.py
--- a/rpython/jit/backend/x86/rx86.py
+++ b/rpython/jit/backend/x86/rx86.py
@@ -396,6 +396,8 @@
INSN_rb = insn(rex_w, chr(base+3), register(1,8), stack_bp(2))
INSN_rs = insn(rex_w, chr(base+3), register(1,8), stack_sp(2))
INSN_rm = insn(rex_w, chr(base+3), register(1,8), mem_reg_plus_const(2))
+ INSN_ra = insn(rex_w, chr(base+3), register(1,8),
+ mem_reg_plus_scaled_reg_plus_const(2))
INSN_rj = insn(rex_w, chr(base+3), register(1,8), abs_(2))
INSN_ji8 = insn(rex_w, '\x83', orbyte(base), abs_(1), immediate(2,'b'))
INSN_mi8 = insn(rex_w, '\x83', orbyte(base), mem_reg_plus_const(1),
@@ -418,7 +420,7 @@
INSN_bi._always_inline_ = True # try to constant-fold single_byte()
return (INSN_ri, INSN_rr, INSN_rb, INSN_bi, INSN_br, INSN_rm, INSN_rj,
- INSN_ji8, INSN_mi8, INSN_rs, INSN_ri32)
+ INSN_ji8, INSN_mi8, INSN_rs, INSN_ri32, INSN_ra)
def select_8_or_32_bit_immed(insn_8, insn_32):
def INSN(*args):
@@ -514,13 +516,13 @@
INC_m = insn(rex_w, '\xFF', orbyte(0), mem_reg_plus_const(1))
INC_j = insn(rex_w, '\xFF', orbyte(0), abs_(1))
- AD1_ri,ADD_rr,ADD_rb,_,_,ADD_rm,ADD_rj,_,_,ADD_rs, _ = common_modes(0)
- OR_ri, OR_rr, OR_rb, _,_,OR_rm, OR_rj, _,_,_,_ = common_modes(1)
- AND_ri,AND_rr,AND_rb,_,_,AND_rm,AND_rj,_,_,_,_ = common_modes(4)
- SU1_ri,SUB_rr,SUB_rb,_,_,SUB_rm,SUB_rj,SUB_ji8,SUB_mi8,_,_ =
common_modes(5)
- SBB_ri,SBB_rr,SBB_rb,_,_,SBB_rm,SBB_rj,_,_,_,_ = common_modes(3)
- XOR_ri,XOR_rr,XOR_rb,_,_,XOR_rm,XOR_rj,_,_,_,_ = common_modes(6)
- CMP_ri,CMP_rr,CMP_rb,CMP_bi,CMP_br,CMP_rm,CMP_rj,_,_,_,CMP_ri32 =
common_modes(7)
+ AD1_ri,ADD_rr,ADD_rb,_,_,ADD_rm,ADD_rj,_,_,ADD_rs,_,_ = common_modes(0)
+ OR_ri, OR_rr, OR_rb, _,_,OR_rm, OR_rj, _,_,_,_,_ = common_modes(1)
+ AND_ri,AND_rr,AND_rb,_,_,AND_rm,AND_rj,_,_,_,_,_ = common_modes(4)
+ SU1_ri,SUB_rr,SUB_rb,_,_,SUB_rm,SUB_rj,SUB_ji8,SUB_mi8,_,_,_ =
common_modes(5)
+ SBB_ri,SBB_rr,SBB_rb,_,_,SBB_rm,SBB_rj,_,_,_,_,_ = common_modes(3)
+ XOR_ri,XOR_rr,XOR_rb,_,_,XOR_rm,XOR_rj,_,_,_,_,_ = common_modes(6)
+ CMP_ri,CMP_rr,CMP_rb,CMP_bi,CMP_br,CMP_rm,CMP_rj,_,_,_,CMP_ri32,CMP_ra =
common_modes(7)
ADD32_mi32 = insn(rex_nw, '\x81', mem_reg_plus_const(1), immediate(2))
diff --git a/rpython/rtyper/lltypesystem/ll2ctypes.py
b/rpython/rtyper/lltypesystem/ll2ctypes.py
--- a/rpython/rtyper/lltypesystem/ll2ctypes.py
+++ b/rpython/rtyper/lltypesystem/ll2ctypes.py
@@ -936,6 +936,9 @@
elif isinstance(llobj, llmemory.ArrayItemsOffset):
CARRAY = get_ctypes_type(llobj.TYPE)
llobj = CARRAY.items.offset
+ elif isinstance(llobj, llmemory.ArrayLengthOffset):
+ CARRAY = get_ctypes_type(llobj.TYPE)
+ llobj = CARRAY.length.offset
else:
raise NotImplementedError(llobj) # don't know about symbolic
value
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit