Author: Armin Rigo <[email protected]>
Branch: guard-compatible
Changeset: r84550:545a21ba6c23
Date: 2016-05-21 19:28 +0200
http://bitbucket.org/pypy/pypy/changeset/545a21ba6c23/
Log: Slow progress, mostly tweaking the design as I try it
diff --git a/rpython/jit/backend/x86/guard_compat.py
b/rpython/jit/backend/x86/guard_compat.py
--- a/rpython/jit/backend/x86/guard_compat.py
+++ b/rpython/jit/backend/x86/guard_compat.py
@@ -1,94 +1,100 @@
from rpython.rlib import rgc
from rpython.rlib.objectmodel import we_are_translated
+from rpython.rlib.rarithmetic import r_uint
from rpython.rtyper.lltypesystem import lltype, rffi
from rpython.jit.backend.x86.arch import WORD, IS_X86_32, IS_X86_64
from rpython.jit.backend.x86 import rx86, codebuf, valgrind
from rpython.jit.backend.x86.regloc import X86_64_SCRATCH_REG, imm, eax, edx
from rpython.jit.backend.llsupport.asmmemmgr import MachineDataBlockWrapper
+from rpython.jit.backend.llsupport.jitframe import GCMAP
from rpython.jit.metainterp.compile import GuardCompatibleDescr
from rpython.jit.metainterp.history import BasicFailDescr
#
# GUARD_COMPATIBLE(reg, const-ptr) produces the following assembler.
-# It uses a special version of the failure recovery stub code written
-# by generate_quick_failure(), which saves a few things at different
-# locations and then jumps to the tree-searching algo, as described
-# later. We also have the normal failure code at <failure_recovery>,
-# see below. In the following code, ofs(x) means the offset in the GC
-# table of the constant pointer 'x':
+# We also have the normal failure code at <failure_recovery>, which is
+# not put in the assembler but only in a field of the descr. In the
+# following code, ofs(x) means the offset in the GC table of the
+# pointer 'x':
#
# MOV reg2, [RIP + ofs(_backend_choices)]
# CMP reg, [reg2 + bc_most_recent]
-# JNE <failure_and_search_tree>
+# JNE slow_case
# JMP *[reg2 + bc_most_recent + 8]
-# sequel:
+# slow_case:
+# PUSH RAX # save
+# PUSH RDX # save
+# MOV RAX, reg # the value to search for
+# MOV RDX, reg2 # _backend_choices object
+# JMP search_tree # see below
+# sequel:
#
-# The faildescr for this guard is a GuardCompatibleDescr. The
-# '_backend_choices' (which is added as a field to
-# GuardCompatibleDescr only when not translated) has the following
-# structure:
+# The faildescr for this guard is a GuardCompatibleDescr. We add to
+# them a few fields:
+#
+# - _backend_choices_addr: points inside the GC table, to
+# ofs(_backend_choices)
+# - _backend_sequel_label: points to the <sequel> label
+# - _backend_failure_recovery: points to the <failure_recovery> label
+#
+# The '_backend_choices' object itself is a separate GC struct/array
+# with the following fields:
#
# - bc_gcmap: a copy of the gcmap at this point
# - bc_faildescr: a copy of the faildescr of that guard
# - bc_most_recent: 1 pair (gcref, asmaddr)
-# - bc_tree: N pairs (gcref, asmaddr)
+# - bc_list: N pairs (gcref, asmaddr) sorted according to gcref
#
-# The tree contains all items for which find_compatible() was called and
-# returned non-zero. It caches the non-zero result in 'asmaddr'. The
-# separate most_recent entry caches the last value seen, along with
-# the result of find_compatible(). If this find_compatible() returned
-# zero, then the cache entry contains the 'fail_guard' label below
-# as the 'asmaddr' value (such a value is never found inside the tree,
-# only in the most_recent entry).
+# It has a custom trace hook that keeps the bc_list sorted if the
+# gcrefs move, and ignores the tail of bc_list which contains the
+# invalid gcref of value -1.
#
-# The tree is a binary-search tree with a value at every node and leaf.
-# The length N of the array is equal to '2**D - 1', where D is the depth
-# of the tree. There are '2**(D-1) - 1' nodes and '2**(D-1)' leaves.
-# Trees start at D=1 and grows by one every time they need to be
-# reallocated. A tree of depth D has always all its nodes used, but
-# some leaves may be unused; such leaves store a pair (NULL, zero).
-# For now we assume that NULL is never received by guard_compatible().
+# Initially, the _backend_choices contains a list of length 1, and
+# both bc_most_recent and bc_list[0] contain the same pair (gcref,
+# sequel), where 'gcref' is the 2nd argument to guard_compatible() and
+# <sequel> is the address of the label above.
#
-# Tree organization: the root is at index 0. Starting at a node at
-# index i, the left child is at i+1, and the right child is at i+w,
-# where 'w' is computed as follows: start from the length of the whole
-# array; divide it by two and round the (non-integer) result upwards
-# for every level you see; you get 'w'. When you reach 'w=1', you are
-# at the level of leaves.
+# In general, the list can grow to contain all items for which
+# find_compatible() was called and returned non-zero. Every entry
+# caches the result in 'asmaddr'. The separate 'most_recent' entry
+# caches the last value seen, along with the result of
+# find_compatible(). If this find_compatible() returned zero, then
+# the cache entry contains the 'fail_guard' label below as the
+# 'asmaddr' value (such a value is never found inside bc_list, only in
+# bc_most_recent).
#
-# The special value 'asmaddr=-1' is replaced with the actual address
-# of the 'sequel' label above inside the tree, so that we don't have
-# to special-case it here. The special value 'asmaddr=0' in
-# 'most_recent' is replaced with the <failure_recovery> address
-# introduced above.
+# The list is sorted, so we can search it using binary search. The
+# length N of the list is equal to '2**D - 1', where D is the depth of
+# the tree algo. Lists start with 1 item (D=1) and grow to the next
+# power-of-two-minus-one every time they need to be reallocated. The
+# list is over-allocated, and the tail contains pairs (-1, ?), with -1
+# being the largest unsigned value (and never a valid GC pointer).
#
-# Here is the modified <failure_and_search_tree> code:
-#
-# PUSH RAX # save
-# PUSH RDX # save
-# MOV RAX, reg # the value to search for
-# MOV RDX, reg2 # _backend_choices object
-# JMP search_tree
+# When find_compatible() returns -1, we replace it with the address of
+# the 'sequel' label above, so that we don't have to special-case it
+# any more. When find_compatible() returns 0, it is not stored in the
+# list, but still stored in bc_most_recent, with the 0 replaced with
+# the <failure_recovery> address introduced above.
#
# Here is the x86-64 runtime code to walk the tree:
#
# search_tree:
# MOV [RSP+16], RDX # save
-# MOV R11, [RDX + bc_tree.length]
-# LEA RDX, [RDX + bc_tree.items]
-# JMP entry
+# MOV R11, [RDX + bc_list.length] # a power of two minus one
+# ADD RDX, $bc_list.items
+# JMP loop
+#
# right:
# LEA RDX, [RDX + 8*R11 + 8]
-# loop:
+# left:
# SHR R11, 1
# JZ not_found
-# entry:
-# CMP RAX, [RDX]
+# loop:
+# # search for the item at addresses between RDX and RDX+16*R11, included
+# CMP RAX, [RDX + 8*R11 - 8] # R11 = ...31, 15, 7, 3, 1
# JA right
-# JE found
-# ADD RDX, 16
-# JMP loop
+# JNE left
#
# found:
# MOV R11, [RDX + 8]
@@ -100,11 +106,11 @@
# JMP *R11
#
# not_found:
-# MOV RDX, [RSP+16]
+# <save all registers to the jitframe RBP,
+# reading and popping the original RAX and RDX off the stack>
+# MOV RDX, [RSP]
# MOV R11, [RDX + bc_gcmap]
# MOV [RBP + jf_gcmap], R11
-# <save all registers to the jitframe RBP,
-# reading and popping RAX and RDX off the stack>
# <call invoke_find_compatible(_backend_choices=RDX, value=RAX)>
# <_reload_frame_if_necessary>
# MOV R11, RAX
@@ -112,47 +118,99 @@
# JMP *R11
#
#
-# invoke_find_compatible(_backend_choices, value):
+# invoke_find_compatible(bchoices, new_gcref):
+# descr = bchoices.bc_faildescr
# try:
-# descr = _backend_choices.bc_faildescr
-# result = descr.find_compatible(cpu, value)
+# result = descr.find_compatible(cpu, new_gcref)
# if result == 0:
-# result = <failure_recovery>
+# result = descr._backend_failure_recovery
# else:
# if result == -1:
-# result = <sequel>
-# _backend_choices = add_in_tree(_backend_choices, value, result)
-# _backend_choices.bc_most_recent.gcref = value
-# _backend_choices.bc_most_recent.asmaddr = result
+# result = descr._backend_sequel_label
+# bchoices = add_in_tree(bchoices, new_gcref, result)
+# descr.bchoices_addr[0] = bchoices # GC table
+# bchoices.bc_most_recent.gcref = new_gcref
+# bchoices.bc_most_recent.asmaddr = result
# return result
# except: # oops!
-# return <failure_recovery>
+# return descr._backend_failure_recovery
+#
+# add_in_tree(bchoices, new_gcref, new_addr):
+# if bchoices.bc_list[len(bchoices.bc_list) - 1] != -1:
+# ...reallocate...
+# bchoices.bc_list[len(bchoices.bc_list) - 1].gcref = new_gcref
+# bchoices.bc_list[len(bchoices.bc_list) - 1].asmaddr = new_addr
+# quicksort(bchoices.bc_list)
+# return bchoices
+#
+# Other issues: compile_bridge() called on a GuardCompatibleDescr must
+# not to do any patching, but instead it needs to clear
+# bchoices.bc_most_recent. Otherwise, we will likely directly jump to
+# <failure_recovery> next time, if the newly added gcref is still in
+# bc_most_recent.gcref. (We can't add it to bc_most_recent or bc_list
+# from compile_bridge(), because we don't know what the gcref should
+# be, but it doesn't matter.)
#
# ____________________________________________________________
+PAIR = lltype.Struct('PAIR', ('gcref', llmemory.GCREF,
+ 'asmaddr', lltype.Signed))
+BACKEND_CHOICES = lltype.GcStruct('BACKEND_CHOICES',
+ ('bc_gcmap', lltype.Ptr(jitframe.GCMAP)),
+ ('bc_faildescr', llmemory.GCREF),
+ ('bc_most_recent', PAIR),
+ ('bc_list', lltype.Array(PAIR)))
-# uses the raw structure COMPATINFO, which is informally defined like this:
-# it starts with a negative 'small_ofs' value (see in the code)
-# then there is an array containing all the expected values that should pass
-# the guard, ending in -1.
+def invoke_find_compatible(bchoices, new_gcref):
+ descr = bchoices.bc_faildescr
+ try:
+ result = descr.find_compatible(cpu, new_gcref)
+ if result == 0:
+ result = descr._backend_failure_recovery
+ else:
+ if result == -1:
+ result = descr._backend_sequel_label
+ bchoices = add_in_tree(bchoices, new_gcref, result)
+ descr._backend_choices_addr[0] = bchoices # GC table
+ bchoices.bc_most_recent.gcref = new_gcref
+ bchoices.bc_most_recent.asmaddr = result
+ return result
+ except: # oops!
+ return descr._backend_failure_recovery
+def add_in_tree(bchoices, new_gcref, new_asmaddr):
+ length = len(bchoices.bc_list)
+ if bchoices.bc_list[length - 1] != -1:
+ # reallocate
+ new_bchoices = lltype.malloc(BACKEND_CHOICES, length * 2 + 1)
+ new_bchoices.bc_gcmap = bchoices.bc_gcmap
+ new_bchoices.bc_faildescr = bchoices.bc_faildescr
+ new_bchoices.bc_most_recent.gcref = bchoices.bc_most_recent.gcref
+ new_bchoices.bc_most_recent.asmaddr = bchoices.bc_most_recent.asmaddr
+ i = 0
+ while i < length:
+ new_bchoices.bc_list[i].gcref = bchoices.bc_list[i].gcref
+ new_bchoices.bc_list[i].asmaddr = bchoices.bc_list[i].asmaddr
+ i += 1
+ # fill the new pairs with the invalid gcref value -1
+ length *= 2
+ gcref_base = lltype.cast_opaque_ptr(llmemory.GCREF, new_bchoices)
+ ofs = (llmemory.offsetof(BACKEND_CHOICES, 'bc_list') +
+ llmemory.itemoffsetof(BACKEND_CHOICES.bc_list))
+ while i < length:
+ llop.raw_store(lltype.Void, gcref_base, ofs, r_uint(-1))
+ ofs += llmemory.sizeof(PAIR)
+ i += 1
+ #
+ bchoices.bc_list[length - 1].gcref = new_gcref
+ bchoices.bc_list[length - 1].asmaddr = new_addr
+ quicksort(bchoices)
+ return bchoices
-# --tweakable parameters (you get the effect closest to before we had
-# guard-compat by setting GROW_POSITION to 1 and UPDATE_ASM to 0)--
-# where grow_switch puts the new value:
-# 0 = at the beginning of the list
-# 1 = at position N-1, just before the initial value which stays last
-# 2 = at the end
-GROW_POSITION = 2
-# when guard_compatible's slow path is called and finds a value, when
-# should we update the machine code to make this value the fast-path?
-# 0 = never
-# another value = after about this many calls to the slow-path
-UPDATE_ASM = 1291
def generate_guard_compatible(assembler, guard_token, loc_reg, initial_value):
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit