Author: Armin Rigo <[email protected]> Branch: guard-compatible Changeset: r90011:f8ac0ceb09a5 Date: 2017-02-08 19:53 +0100 http://bitbucket.org/pypy/pypy/changeset/f8ac0ceb09a5/
Log: Update comments with a plan diff --git a/rpython/jit/backend/x86/guard_compat.py b/rpython/jit/backend/x86/guard_compat.py --- a/rpython/jit/backend/x86/guard_compat.py +++ b/rpython/jit/backend/x86/guard_compat.py @@ -9,38 +9,62 @@ # -# GUARD_COMPATIBLE(reg, const-ptr) produces the following assembler. -# We also have the normal failure code at <failure_recovery>, which is -# not put in the assembler but only in a field of the descr. In the -# following code, ofs(x) means the offset in the GC table of the -# pointer 'x': +# GUARD_COMPATIBLE(reg, const-ptr) produces the same assembler as +# a GUARD_VALUE. In the following code, ofs(x) means the offset in +# the GC table of the pointer 'x': # -# MOV reg2, [RIP + ofs(_backend_choices)] # LOAD_FROM_GC_TABLE -# CMP reg, [reg2 + bc_most_recent] +# MOV reg2, [RIP + ofs(const-ptr)] # LOAD_FROM_GC_TABLE +# CMP reg, reg2 +# JNE recovery_stub +# sequel: +# <reg2 not used any more> +# +# The difference is that 'recovery_stub' does not jump to one of the +# 'failure_recovery_code' versions, but instead it jumps to +# 'expand_guard_compatible'. The latter calls invoke_find_compatible. +# The result is one of: +# +# * 0: bail out. We jump to the 'failure_recovery_code'. +# +# * -1: continue running on the same path. We patch ofs(const-ptr) +# to contain the new value, and jump to 'sequel'. +# +# * otherwise, it's the address of a bridge. We jump to that bridge. +# +# This is the basic idea, but not the truth. Things are more +# complicated because we cache in the assembler the +# invoke_find_compatible call results. 'expand_guard_compatible' +# actually allocates a '_backend_choices' object, copies on it +# various data it got from the recovery_stub, then patches the +# recovery stub to this (the original recovery stub was padded if +# necessary to have enough room): +# +# recovery_stub: +# MOV R11, [RIP + ofs(_backend_choices)] +# CMP reg, [R11 + bc_most_recent] # JNE slow_case -# JMP *[reg2 + bc_most_recent + 8] +# JMP *[R11 + bc_most_recent + 8] # slow_case: -# PUSH RDX # save -# PUSH RAX # save -# MOV RDX=reg2, RAX=reg -# RDX is the _backend_choices object, RAX is the value to search for -# JMP search_tree # see below -# sequel: +# PUSH RAX # save the original value of RAX +# MOV RAX, reg # the value to search for +# JMP *[R11 + bc_search_tree] # x86-64: trick for a compact encoding # -# The faildescr for this guard is a GuardCompatibleDescr. We add to -# them a few fields: +# The faildescr for the GUARD_COMPATIBLE is a GuardCompatibleDescr. +# Fields relevant for this discussion: # -# - _backend_choices_addr: points inside the GC table, to -# ofs(_backend_choices) -# - _backend_sequel_label: points to the <sequel> label -# - _backend_failure_recovery: points to the <failure_recovery> label -# - _backend_gcmap: a copy of the gcmap at this point +# - _backend_ptr_addr: points inside the GC table, to ofs(const-ptr). +# ofs(_backend_choices) is just afterwards. +# Initially _backend_choices is NULL. +# - adr_jump_offset: raw address of the 'sequel' label (this field +# is the same as on any other GuardDescr) # -# The '_backend_choices' object itself is a separate GC struct/array -# with the following fields: +# The '_backend_choices' object itself, when allocated, is a separate +# GC struct/array with the following fields: # -# - bc_faildescr: a copy of the faildescr of that guard +# - bc_faildescr: a reference to the faildescr of that guard +# - bc_gcmap: a copy of the gcmap at this point # - bc_gc_table_tracer: only for a gc_writebarrier() +# - bc_search_tree: always the 'search_tree' label below # - bc_most_recent: 1 pair (gcref, asmaddr) # - bc_list: N pairs (gcref, asmaddr) sorted according to gcref # @@ -48,66 +72,70 @@ # gcrefs move, and ignores the tail of bc_list which contains the # invalid gcref of value -1. # -# Initially, the _backend_choices contains a list of length 1, and -# both bc_most_recent and bc_list[0] contain the same pair (gcref, -# sequel), where 'gcref' is the 2nd argument to guard_compatible() and -# <sequel> is the address of the label above. -# -# In general, the list can grow to contain all items for which -# find_compatible() was called and returned non-zero. Every entry -# caches the result in 'asmaddr'. The separate 'most_recent' entry -# caches the last value seen, along with the result of -# find_compatible(). If this find_compatible() returned zero, then -# the cache entry contains the 'fail_guard' label below as the -# 'asmaddr' value (such a value is never found inside bc_list, only in -# bc_most_recent). +# The bc_list can grow to contain all items for which find_compatible() +# was called and returned non-zero. Every entry caches the result in +# 'asmaddr'. The separate 'most_recent' entry caches the last value +# seen, along with the result of find_compatible(). # # The list is sorted, so we can search it using binary search. The # length N of the list is equal to '2**D - 1', where D is the depth of -# the tree algo. Lists start with 1 item (D=1) and grow to the next +# the tree algo. Lists start with room for 3 items (D=2) and grow to the next # power-of-two-minus-one every time they need to be reallocated. The # list is over-allocated, and the tail contains pairs (-1, ?), with -1 # being the largest unsigned value (and never a valid GC pointer). # -# When find_compatible() returns -1, we replace it with the address of -# the 'sequel' label above, so that we don't have to special-case it -# any more. When find_compatible() returns 0, it is not stored in the -# list, but still stored in bc_most_recent, with the 0 replaced with -# the <failure_recovery> address introduced above. +# When find_compatible() returns -1, we save instead the address of +# 'sequel' in 'asmaddr'. We could also patch ofs(const-ptr) so that the +# fastest path applies if the same value is seen the next time; it +# should be measured if it changes anything or not, because such a +# patching occurs only once (i.e. 'search_tree' will not patch it +# again). +# +# When find_compatible() returns 0, it is not stored in bc_list, +# but still stored in bc_most_recent, with 'guard_compat_recovery' +# as the 'asmaddr'. Here is 'guard_compat_recovery': it emulates +# generate_quick_failure() from assembler.py, and so it plays the role +# of the original (patched) recovery stub. +# +# guard_compat_recovery: +# PUSH R11 +# PUSH [R11 + bc_gcmap] +# JMP target # # Here is the x86-64 runtime code to walk the tree: # # search_tree: -# MOV [RSP+16], RDX # save -# MOV R11, [RDX + bc_list.length] # a power of two minus one -# ADD RDX, $bc_list.items +# MOV [ESP+8], RCX # save the original value +# MOV [ESP+16], R11 # save the _backend_choices object +# MOV RCX, [R11 + bc_list.length] # a power of two minus one +# ADD R11, $bc_list.items # JMP loop # # right: -# LEA RDX, [RDX + 8*R11 + 8] +# LEA R11, [R11 + 8*RCX + 8] # left: -# SHR R11, 1 +# SHR RCX, 1 # JZ not_found # loop: -# # search for the item at addresses between RDX and RDX+16*R11, included -# # (note that R11 is always odd here; even though we use 8*R11 in the +# # search for the item at addresses between R11 and R11+16*RCX, included +# # (note that RCX is always odd here; even though we use 8*RCX in the # # following instruction, we're really accessing 16-bytes-sized items) -# CMP RAX, [RDX + 8*R11 - 8] # R11 = ...31, 15, 7, 3, 1 +# CMP RAX, [R11 + 8*RCX - 8] # RCX = ...31, 15, 7, 3, 1 # JA right # JNE left # # found: -# MOV R11, [RDX + 8*R11] -# MOV RDX, [RSP+16] -# MOV [RDX + bc_most_recent], RAX -# MOV [RDX + bc_most_recent + 8], R11 -# POP RAX -# POP RDX +# MOV R11, [R11 + 8*RCX] # address to jump to next +# MOV RCX, [ESP+16] # reload the _backend_choices object +# MOV [RCX + bc_most_recent], RAX +# MOV [RCX + bc_most_recent + 8], R11 +# MOV RCX, [ESP+8] # restore saved value +# POP RAX # pushed by the caller # JMP *R11 # # not_found: # <save all registers to the jitframe RBP, -# reading and popping the original RAX and RDX off the stack> +# reading and popping the original RAX and RCX off the stack> # <call invoke_find_compatible(_backend_choices=[RSP], value=RAX), # jitframe=RBP> # <_reload_frame_if_necessary> @@ -147,25 +175,6 @@ # # ____________________________________________________________ -# Possible optimization: GUARD_COMPATIBLE(reg, const-ptr) could emit -# first assembler that is similar to a GUARD_VALUE. As soon as a -# second case is seen, this assembler is patched (once) to turn it -# into the general switching version above. The entry in the GC table -# at ofs(_backend_choices) starts with the regular const-ptr, and the -# BACKEND_CHOICES object is only allocated when the assembler is -# patched. The original assembler can be similar to a GUARD_VALUE: -# -# MOV reg2, [RIP + ofs(const-ptr)] # == ofs(_backend_choices) -# CMP reg, reg2 -# JE sequel -# PUSH [RIP + ofs(guard_compatible_descr)] -# JMP guard_compat_second_case -# <padding to make the code large enough for patching> -# <ends with one byte which tells the size of this block> -# sequel: -# -# ____________________________________________________________ - # A lot of the logic is not specific to the x86 backend and is # written in ../llsupport/guard_compat.py. _______________________________________________ pypy-commit mailing list [email protected] https://mail.python.org/mailman/listinfo/pypy-commit
