Author: Armin Rigo <[email protected]>
Branch:
Changeset: r44685:5a240a694bb8
Date: 2011-06-04 07:24 +0200
http://bitbucket.org/pypy/pypy/changeset/5a240a694bb8/
Log: Merge jit-write-barrier-from-array:
* in case the JIT generates a SETARRAYITEM_GC on a list which it
cannot prove is short enough, we should really use
write_barrier_from_array instead of the default write_barrier.
* get rid of GcRefList, one of the last remaining causes of
leaks, as far as I can tell.
diff --git a/pypy/jit/backend/llsupport/gc.py b/pypy/jit/backend/llsupport/gc.py
--- a/pypy/jit/backend/llsupport/gc.py
+++ b/pypy/jit/backend/llsupport/gc.py
@@ -34,7 +34,7 @@
pass
def do_write_barrier(self, gcref_struct, gcref_newptr):
pass
- def rewrite_assembler(self, cpu, operations):
+ def rewrite_assembler(self, cpu, operations, gcrefs_output_list):
return operations
def can_inline_malloc(self, descr):
return False
@@ -146,78 +146,6 @@
# All code below is for the hybrid or minimark GC
-class GcRefList:
- """Handles all references from the generated assembler to GC objects.
- This is implemented as a nonmovable, but GC, list; the assembler contains
- code that will (for now) always read from this list."""
-
- GCREF_LIST = lltype.GcArray(llmemory.GCREF) # followed by the GC
-
- HASHTABLE = rffi.CArray(llmemory.Address) # ignored by the GC
- HASHTABLE_BITS = 10
- HASHTABLE_SIZE = 1 << HASHTABLE_BITS
-
- def initialize(self):
- if we_are_translated(): n = 2000
- else: n = 10 # tests only
- self.list = self.alloc_gcref_list(n)
- self.nextindex = 0
- self.oldlists = []
- # A pseudo dictionary: it is fixed size, and it may contain
- # random nonsense after a collection moved the objects. It is only
- # used to avoid too many duplications in the GCREF_LISTs.
- self.hashtable = lltype.malloc(self.HASHTABLE,
- self.HASHTABLE_SIZE+1,
- flavor='raw', track_allocation=False)
- dummy = lltype.direct_ptradd(lltype.direct_arrayitems(self.hashtable),
- self.HASHTABLE_SIZE)
- dummy = llmemory.cast_ptr_to_adr(dummy)
- for i in range(self.HASHTABLE_SIZE+1):
- self.hashtable[i] = dummy
-
- def alloc_gcref_list(self, n):
- # Important: the GRREF_LISTs allocated are *non-movable*. This
- # requires support in the gc (hybrid GC or minimark GC so far).
- if we_are_translated():
- list = rgc.malloc_nonmovable(self.GCREF_LIST, n)
- assert list, "malloc_nonmovable failed!"
- else:
- list = lltype.malloc(self.GCREF_LIST, n) # for tests only
- return list
-
- def get_address_of_gcref(self, gcref):
- assert lltype.typeOf(gcref) == llmemory.GCREF
- # first look in the hashtable, using an inexact hash (fails after
- # the object moves)
- addr = llmemory.cast_ptr_to_adr(gcref)
- hash = llmemory.cast_adr_to_int(addr, "forced")
- hash -= hash >> self.HASHTABLE_BITS
- hash &= self.HASHTABLE_SIZE - 1
- addr_ref = self.hashtable[hash]
- # the following test is safe anyway, because the addresses found
- # in the hashtable are always the addresses of nonmovable stuff
- # ('addr_ref' is an address inside self.list, not directly the
- # address of a real moving GC object -- that's 'addr_ref.address[0]'.)
- if addr_ref.address[0] == addr:
- return addr_ref
- # if it fails, add an entry to the list
- if self.nextindex == len(self.list):
- # reallocate first, increasing a bit the size every time
- self.oldlists.append(self.list)
- self.list = self.alloc_gcref_list(len(self.list) // 4 * 5)
- self.nextindex = 0
- # add it
- index = self.nextindex
- self.list[index] = gcref
- addr_ref = lltype.direct_ptradd(lltype.direct_arrayitems(self.list),
- index)
- addr_ref = llmemory.cast_ptr_to_adr(addr_ref)
- self.nextindex = index + 1
- # record it in the hashtable
- self.hashtable[hash] = addr_ref
- return addr_ref
-
-
class GcRootMap_asmgcc(object):
"""Handles locating the stack roots in the assembler.
This is the class supporting --gcrootfinder=asmgcc.
@@ -527,6 +455,7 @@
def __init__(self, gc_ll_descr):
self.llop1 = gc_ll_descr.llop1
self.WB_FUNCPTR = gc_ll_descr.WB_FUNCPTR
+ self.WB_ARRAY_FUNCPTR = gc_ll_descr.WB_ARRAY_FUNCPTR
self.fielddescr_tid = get_field_descr(gc_ll_descr,
gc_ll_descr.GCClass.HDR, 'tid')
self.jit_wb_if_flag = gc_ll_descr.GCClass.JIT_WB_IF_FLAG
@@ -546,6 +475,13 @@
funcaddr = llmemory.cast_ptr_to_adr(funcptr)
return cpu.cast_adr_to_int(funcaddr)
+ def get_write_barrier_from_array_fn(self, cpu):
+ llop1 = self.llop1
+ funcptr = llop1.get_write_barrier_from_array_failing_case(
+ self.WB_ARRAY_FUNCPTR)
+ funcaddr = llmemory.cast_ptr_to_adr(funcptr)
+ return cpu.cast_adr_to_int(funcaddr) # this may return 0
+
class GcLLDescr_framework(GcLLDescription):
DEBUG = False # forced to True by x86/test/test_zrpy_gc.py
@@ -559,7 +495,7 @@
self.translator = translator
self.llop1 = llop1
- # we need the hybrid or minimark GC for GcRefList.alloc_gcref_list()
+ # we need the hybrid or minimark GC for rgc._make_sure_does_not_move()
# to work
if gcdescr.config.translation.gc not in ('hybrid', 'minimark'):
raise NotImplementedError("--gc=%s not implemented with the JIT" %
@@ -574,8 +510,6 @@
" with the JIT" % (name,))
gcrootmap = cls(gcdescr)
self.gcrootmap = gcrootmap
- self.gcrefs = GcRefList()
- self.single_gcref_descr = GcPtrFieldDescr('', 0)
# make a TransformerLayoutBuilder and save it on the translator
# where it can be fished and reused by the FrameworkGCTransformer
@@ -617,6 +551,8 @@
[lltype.Signed, lltype.Signed], llmemory.GCREF))
self.WB_FUNCPTR = lltype.Ptr(lltype.FuncType(
[llmemory.Address, llmemory.Address], lltype.Void))
+ self.WB_ARRAY_FUNCPTR = lltype.Ptr(lltype.FuncType(
+ [llmemory.Address, lltype.Signed], lltype.Void))
self.write_barrier_descr = WriteBarrierDescr(self)
#
def malloc_array(itemsize, tid, num_elem):
@@ -706,7 +642,6 @@
return rffi.cast(lltype.Signed, fptr)
def initialize(self):
- self.gcrefs.initialize()
self.gcrootmap.initialize()
def init_size_descr(self, S, descr):
@@ -768,54 +703,32 @@
funcptr(llmemory.cast_ptr_to_adr(gcref_struct),
llmemory.cast_ptr_to_adr(gcref_newptr))
- def replace_constptrs_with_getfield_raw(self, cpu, newops, op):
- # xxx some performance issue here
- newargs = [None] * op.numargs()
- needs_copy = False
+ def record_constptrs(self, op, gcrefs_output_list):
for i in range(op.numargs()):
v = op.getarg(i)
- newargs[i] = v
if isinstance(v, ConstPtr) and bool(v.value):
- addr = self.gcrefs.get_address_of_gcref(v.value)
- # ^^^even for non-movable objects, to record their presence
- if rgc.can_move(v.value):
- box = BoxPtr(v.value)
- addr = cpu.cast_adr_to_int(addr)
- newops.append(ResOperation(rop.GETFIELD_RAW,
- [ConstInt(addr)], box,
- self.single_gcref_descr))
- newargs[i] = box
- needs_copy = True
- #
- if needs_copy:
- return op.copy_and_change(op.getopnum(), args=newargs)
- else:
- return op
+ p = v.value
+ rgc._make_sure_does_not_move(p)
+ gcrefs_output_list.append(p)
-
- def rewrite_assembler(self, cpu, operations):
+ def rewrite_assembler(self, cpu, operations, gcrefs_output_list):
# Perform two kinds of rewrites in parallel:
#
# - Add COND_CALLs to the write barrier before SETFIELD_GC and
# SETARRAYITEM_GC operations.
#
- # - Remove all uses of ConstPtrs away from the assembler.
- # Idea: when running on a moving GC, we can't (easily) encode
- # the ConstPtrs in the assembler, because they can move at any
- # point in time. Instead, we store them in 'gcrefs.list', a GC
- # but nonmovable list; and here, we modify 'operations' to
- # replace direct usage of ConstPtr with a BoxPtr loaded by a
- # GETFIELD_RAW from the array 'gcrefs.list'.
+ # - Record the ConstPtrs from the assembler.
#
newops = []
+ known_lengths = {}
# we can only remember one malloc since the next malloc can possibly
# collect
last_malloc = None
for op in operations:
if op.getopnum() == rop.DEBUG_MERGE_POINT:
continue
- # ---------- replace ConstPtrs with GETFIELD_RAW ----------
- op = self.replace_constptrs_with_getfield_raw(cpu, newops, op)
+ # ---------- record the ConstPtrs ----------
+ self.record_constptrs(op, gcrefs_output_list)
if op.is_malloc():
last_malloc = op.result
elif op.can_malloc():
@@ -838,19 +751,40 @@
v = op.getarg(2)
if isinstance(v, BoxPtr) or (isinstance(v, ConstPtr) and
bool(v.value)): # store a non-NULL
- # XXX detect when we should produce a
- # write_barrier_from_array
- self._gen_write_barrier(newops, op.getarg(0), v)
+ self._gen_write_barrier_array(newops, op.getarg(0),
+ op.getarg(1), v,
+ cpu, known_lengths)
op = op.copy_and_change(rop.SETARRAYITEM_RAW)
+ elif op.getopnum() == rop.NEW_ARRAY:
+ v_length = op.getarg(0)
+ if isinstance(v_length, ConstInt):
+ known_lengths[op.result] = v_length.getint()
# ----------
newops.append(op)
return newops
- def _gen_write_barrier(self, newops, v_base, v_value):
- args = [v_base, v_value]
+ def _gen_write_barrier(self, newops, v_base, v_value_or_index):
+ # NB. the 2nd argument of COND_CALL_GC_WB is either a pointer
+ # (regular case), or an index (case of write_barrier_from_array)
+ args = [v_base, v_value_or_index]
newops.append(ResOperation(rop.COND_CALL_GC_WB, args, None,
descr=self.write_barrier_descr))
+ def _gen_write_barrier_array(self, newops, v_base, v_index, v_value,
+ cpu, known_lengths):
+ if self.write_barrier_descr.get_write_barrier_from_array_fn(cpu) != 0:
+ # If we know statically the length of 'v', and it is not too
+ # big, then produce a regular write_barrier. If it's unknown or
+ # too big, produce instead a write_barrier_from_array.
+ LARGE = 130
+ length = known_lengths.get(v_base, LARGE)
+ if length >= LARGE:
+ # unknown or too big: produce a write_barrier_from_array
+ self._gen_write_barrier(newops, v_base, v_index)
+ return
+ # fall-back case: produce a write_barrier
+ self._gen_write_barrier(newops, v_base, v_value)
+
def can_inline_malloc(self, descr):
assert isinstance(descr, BaseSizeDescr)
if descr.size < self.max_size_of_young_obj:
diff --git a/pypy/jit/backend/llsupport/test/test_gc.py
b/pypy/jit/backend/llsupport/test/test_gc.py
--- a/pypy/jit/backend/llsupport/test/test_gc.py
+++ b/pypy/jit/backend/llsupport/test/test_gc.py
@@ -49,19 +49,6 @@
# ____________________________________________________________
-def test_GcRefList():
- S = lltype.GcStruct('S')
- order = range(50) * 4
- random.shuffle(order)
- allocs = [lltype.cast_opaque_ptr(llmemory.GCREF, lltype.malloc(S))
- for i in range(50)]
- allocs = [allocs[i] for i in order]
- #
- gcrefs = GcRefList()
- gcrefs.initialize()
- addrs = [gcrefs.get_address_of_gcref(ptr) for ptr in allocs]
- for i in range(len(allocs)):
- assert addrs[i].address[0] == llmemory.cast_ptr_to_adr(allocs[i])
class TestGcRootMapAsmGcc:
@@ -288,6 +275,18 @@
def get_write_barrier_failing_case(self, FPTRTYPE):
return llhelper(FPTRTYPE, self._write_barrier_failing_case)
+ _have_wb_from_array = False
+
+ def _write_barrier_from_array_failing_case(self, adr_struct, v_index):
+ self.record.append(('barrier_from_array', adr_struct, v_index))
+
+ def get_write_barrier_from_array_failing_case(self, FPTRTYPE):
+ if self._have_wb_from_array:
+ return llhelper(FPTRTYPE,
+ self._write_barrier_from_array_failing_case)
+ else:
+ return lltype.nullptr(FPTRTYPE.TO)
+
class TestFramework(object):
gc = 'hybrid'
@@ -303,9 +302,20 @@
config = config_
class FakeCPU(object):
def cast_adr_to_int(self, adr):
- ptr = llmemory.cast_adr_to_ptr(adr, gc_ll_descr.WB_FUNCPTR)
- assert ptr._obj._callable == llop1._write_barrier_failing_case
- return 42
+ if not adr:
+ return 0
+ try:
+ ptr = llmemory.cast_adr_to_ptr(adr, gc_ll_descr.WB_FUNCPTR)
+ assert ptr._obj._callable == \
+ llop1._write_barrier_failing_case
+ return 42
+ except lltype.InvalidCast:
+ ptr = llmemory.cast_adr_to_ptr(
+ adr, gc_ll_descr.WB_ARRAY_FUNCPTR)
+ assert ptr._obj._callable == \
+ llop1._write_barrier_from_array_failing_case
+ return 43
+
gcdescr = get_description(config_)
translator = FakeTranslator()
llop1 = FakeLLOp()
@@ -414,11 +424,11 @@
ResOperation(rop.DEBUG_MERGE_POINT, ['dummy', 2], None),
]
gc_ll_descr = self.gc_ll_descr
- operations = gc_ll_descr.rewrite_assembler(None, operations)
+ operations = gc_ll_descr.rewrite_assembler(None, operations, [])
assert len(operations) == 0
def test_rewrite_assembler_1(self):
- # check rewriting of ConstPtrs
+ # check recording of ConstPtrs
class MyFakeCPU(object):
def cast_adr_to_int(self, adr):
assert adr == "some fake address"
@@ -438,56 +448,12 @@
]
gc_ll_descr = self.gc_ll_descr
gc_ll_descr.gcrefs = MyFakeGCRefList()
+ gcrefs = []
operations = get_deep_immutable_oplist(operations)
- operations = gc_ll_descr.rewrite_assembler(MyFakeCPU(), operations)
- assert len(operations) == 2
- assert operations[0].getopnum() == rop.GETFIELD_RAW
- assert operations[0].getarg(0) == ConstInt(43)
- assert operations[0].getdescr() == gc_ll_descr.single_gcref_descr
- v_box = operations[0].result
- assert isinstance(v_box, BoxPtr)
- assert operations[1].getopnum() == rop.PTR_EQ
- assert operations[1].getarg(0) == v_random_box
- assert operations[1].getarg(1) == v_box
- assert operations[1].result == v_result
-
- def test_rewrite_assembler_1_cannot_move(self):
- # check rewriting of ConstPtrs
- class MyFakeCPU(object):
- def cast_adr_to_int(self, adr):
- xxx # should not be called
- class MyFakeGCRefList(object):
- def get_address_of_gcref(self, s_gcref1):
- seen.append(s_gcref1)
- assert s_gcref1 == s_gcref
- return "some fake address"
- seen = []
- S = lltype.GcStruct('S')
- s = lltype.malloc(S)
- s_gcref = lltype.cast_opaque_ptr(llmemory.GCREF, s)
- v_random_box = BoxPtr()
- v_result = BoxInt()
- operations = [
- ResOperation(rop.PTR_EQ, [v_random_box, ConstPtr(s_gcref)],
- v_result),
- ]
- gc_ll_descr = self.gc_ll_descr
- gc_ll_descr.gcrefs = MyFakeGCRefList()
- old_can_move = rgc.can_move
- operations = get_deep_immutable_oplist(operations)
- try:
- rgc.can_move = lambda s: False
- operations = gc_ll_descr.rewrite_assembler(MyFakeCPU(), operations)
- finally:
- rgc.can_move = old_can_move
- assert len(operations) == 1
- assert operations[0].getopnum() == rop.PTR_EQ
- assert operations[0].getarg(0) == v_random_box
- assert operations[0].getarg(1) == ConstPtr(s_gcref)
- assert operations[0].result == v_result
- # check that s_gcref gets added to the list anyway, to make sure
- # that the GC sees it
- assert seen == [s_gcref]
+ operations2 = gc_ll_descr.rewrite_assembler(MyFakeCPU(), operations,
+ gcrefs)
+ assert operations2 == operations
+ assert gcrefs == [s_gcref]
def test_rewrite_assembler_2(self):
# check write barriers before SETFIELD_GC
@@ -500,7 +466,8 @@
]
gc_ll_descr = self.gc_ll_descr
operations = get_deep_immutable_oplist(operations)
- operations = gc_ll_descr.rewrite_assembler(self.fake_cpu, operations)
+ operations = gc_ll_descr.rewrite_assembler(self.fake_cpu, operations,
+ [])
assert len(operations) == 2
#
assert operations[0].getopnum() == rop.COND_CALL_GC_WB
@@ -515,29 +482,90 @@
def test_rewrite_assembler_3(self):
# check write barriers before SETARRAYITEM_GC
- v_base = BoxPtr()
- v_index = BoxInt()
- v_value = BoxPtr()
- array_descr = AbstractDescr()
- operations = [
- ResOperation(rop.SETARRAYITEM_GC, [v_base, v_index, v_value], None,
- descr=array_descr),
- ]
- gc_ll_descr = self.gc_ll_descr
- operations = get_deep_immutable_oplist(operations)
- operations = gc_ll_descr.rewrite_assembler(self.fake_cpu, operations)
- assert len(operations) == 2
- #
- assert operations[0].getopnum() == rop.COND_CALL_GC_WB
- assert operations[0].getarg(0) == v_base
- assert operations[0].getarg(1) == v_value
- assert operations[0].result is None
- #
- assert operations[1].getopnum() == rop.SETARRAYITEM_RAW
- assert operations[1].getarg(0) == v_base
- assert operations[1].getarg(1) == v_index
- assert operations[1].getarg(2) == v_value
- assert operations[1].getdescr() == array_descr
+ for v_new_length in (None, ConstInt(5), ConstInt(5000), BoxInt()):
+ v_base = BoxPtr()
+ v_index = BoxInt()
+ v_value = BoxPtr()
+ array_descr = AbstractDescr()
+ operations = [
+ ResOperation(rop.SETARRAYITEM_GC, [v_base, v_index, v_value],
+ None, descr=array_descr),
+ ]
+ if v_new_length is not None:
+ operations.insert(0, ResOperation(rop.NEW_ARRAY,
+ [v_new_length], v_base,
+ descr=array_descr))
+ # we need to insert another, unrelated NEW_ARRAY here
+ # to prevent the initialization_store optimization
+ operations.insert(1, ResOperation(rop.NEW_ARRAY,
+ [ConstInt(12)], BoxPtr(),
+ descr=array_descr))
+ gc_ll_descr = self.gc_ll_descr
+ operations = get_deep_immutable_oplist(operations)
+ operations = gc_ll_descr.rewrite_assembler(self.fake_cpu,
+ operations, [])
+ if v_new_length is not None:
+ assert operations[0].getopnum() == rop.NEW_ARRAY
+ assert operations[1].getopnum() == rop.NEW_ARRAY
+ del operations[:2]
+ assert len(operations) == 2
+ #
+ assert operations[0].getopnum() == rop.COND_CALL_GC_WB
+ assert operations[0].getarg(0) == v_base
+ assert operations[0].getarg(1) == v_value
+ assert operations[0].result is None
+ #
+ assert operations[1].getopnum() == rop.SETARRAYITEM_RAW
+ assert operations[1].getarg(0) == v_base
+ assert operations[1].getarg(1) == v_index
+ assert operations[1].getarg(2) == v_value
+ assert operations[1].getdescr() == array_descr
+
+ def test_rewrite_assembler_4(self):
+ # check write barriers before SETARRAYITEM_GC,
+ # if we have actually a write_barrier_from_array.
+ self.llop1._have_wb_from_array = True
+ for v_new_length in (None, ConstInt(5), ConstInt(5000), BoxInt()):
+ v_base = BoxPtr()
+ v_index = BoxInt()
+ v_value = BoxPtr()
+ array_descr = AbstractDescr()
+ operations = [
+ ResOperation(rop.SETARRAYITEM_GC, [v_base, v_index, v_value],
+ None, descr=array_descr),
+ ]
+ if v_new_length is not None:
+ operations.insert(0, ResOperation(rop.NEW_ARRAY,
+ [v_new_length], v_base,
+ descr=array_descr))
+ # we need to insert another, unrelated NEW_ARRAY here
+ # to prevent the initialization_store optimization
+ operations.insert(1, ResOperation(rop.NEW_ARRAY,
+ [ConstInt(12)], BoxPtr(),
+ descr=array_descr))
+ gc_ll_descr = self.gc_ll_descr
+ operations = get_deep_immutable_oplist(operations)
+ operations = gc_ll_descr.rewrite_assembler(self.fake_cpu,
+ operations, [])
+ if v_new_length is not None:
+ assert operations[0].getopnum() == rop.NEW_ARRAY
+ assert operations[1].getopnum() == rop.NEW_ARRAY
+ del operations[:2]
+ assert len(operations) == 2
+ #
+ assert operations[0].getopnum() == rop.COND_CALL_GC_WB
+ assert operations[0].getarg(0) == v_base
+ if isinstance(v_new_length, ConstInt) and v_new_length.value < 130:
+ assert operations[0].getarg(1) == v_value
+ else:
+ assert operations[0].getarg(1) == v_index
+ assert operations[0].result is None
+ #
+ assert operations[1].getopnum() == rop.SETARRAYITEM_RAW
+ assert operations[1].getarg(0) == v_base
+ assert operations[1].getarg(1) == v_index
+ assert operations[1].getarg(2) == v_value
+ assert operations[1].getdescr() == array_descr
def test_rewrite_assembler_initialization_store(self):
S = lltype.GcStruct('S', ('parent', OBJECT),
@@ -558,7 +586,8 @@
jump()
""", namespace=locals())
operations = get_deep_immutable_oplist(ops.operations)
- operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu,
operations)
+ operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu,
+ operations, [])
equaloplists(operations, expected.operations)
def test_rewrite_assembler_initialization_store_2(self):
@@ -583,7 +612,8 @@
jump()
""", namespace=locals())
operations = get_deep_immutable_oplist(ops.operations)
- operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu,
operations)
+ operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu,
+ operations, [])
equaloplists(operations, expected.operations)
def test_rewrite_assembler_initialization_store_3(self):
@@ -602,7 +632,8 @@
jump()
""", namespace=locals())
operations = get_deep_immutable_oplist(ops.operations)
- operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu,
operations)
+ operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu,
+ operations, [])
equaloplists(operations, expected.operations)
class TestFrameworkMiniMark(TestFramework):
diff --git a/pypy/jit/backend/test/runner_test.py
b/pypy/jit/backend/test/runner_test.py
--- a/pypy/jit/backend/test/runner_test.py
+++ b/pypy/jit/backend/test/runner_test.py
@@ -1680,7 +1680,7 @@
record = []
#
S = lltype.GcStruct('S', ('tid', lltype.Signed))
- FUNC = self.FuncType([lltype.Ptr(S), lltype.Signed], lltype.Void)
+ FUNC = self.FuncType([lltype.Ptr(S), lltype.Ptr(S)], lltype.Void)
func_ptr = llhelper(lltype.Ptr(FUNC), func_void)
funcbox = self.get_funcbox(self.cpu, func_ptr)
class WriteBarrierDescr(AbstractDescr):
@@ -1699,12 +1699,48 @@
s = lltype.malloc(S)
s.tid = value
sgcref = lltype.cast_opaque_ptr(llmemory.GCREF, s)
+ t = lltype.malloc(S)
+ tgcref = lltype.cast_opaque_ptr(llmemory.GCREF, t)
del record[:]
self.execute_operation(rop.COND_CALL_GC_WB,
- [BoxPtr(sgcref), ConstInt(-2121)],
+ [BoxPtr(sgcref), ConstPtr(tgcref)],
'void', descr=WriteBarrierDescr())
if cond:
- assert record == [(s, -2121)]
+ assert record == [(s, t)]
+ else:
+ assert record == []
+
+ def test_cond_call_gc_wb_array(self):
+ def func_void(a, b):
+ record.append((a, b))
+ record = []
+ #
+ S = lltype.GcStruct('S', ('tid', lltype.Signed))
+ FUNC = self.FuncType([lltype.Ptr(S), lltype.Signed], lltype.Void)
+ func_ptr = llhelper(lltype.Ptr(FUNC), func_void)
+ funcbox = self.get_funcbox(self.cpu, func_ptr)
+ class WriteBarrierDescr(AbstractDescr):
+ jit_wb_if_flag = 4096
+ jit_wb_if_flag_byteofs = struct.pack("i", 4096).index('\x10')
+ jit_wb_if_flag_singlebyte = 0x10
+ def get_write_barrier_from_array_fn(self, cpu):
+ return funcbox.getint()
+ #
+ for cond in [False, True]:
+ value = random.randrange(-sys.maxint, sys.maxint)
+ if cond:
+ value |= 4096
+ else:
+ value &= ~4096
+ s = lltype.malloc(S)
+ s.tid = value
+ sgcref = lltype.cast_opaque_ptr(llmemory.GCREF, s)
+ del record[:]
+ self.execute_operation(rop.COND_CALL_GC_WB,
+ [BoxPtr(sgcref), ConstInt(123)],
+ 'void', descr=WriteBarrierDescr())
+ if cond:
+ assert record == [(s, 123)]
else:
assert record == []
diff --git a/pypy/jit/backend/x86/assembler.py
b/pypy/jit/backend/x86/assembler.py
--- a/pypy/jit/backend/x86/assembler.py
+++ b/pypy/jit/backend/x86/assembler.py
@@ -322,6 +322,7 @@
# for the duration of compiling one loop or a one bridge.
clt = CompiledLoopToken(self.cpu, looptoken.number)
+ clt.allgcrefs = []
looptoken.compiled_loop_token = clt
if not we_are_translated():
# Arguments should be unique
@@ -335,7 +336,8 @@
operations = self._inject_debugging_code(looptoken, operations)
regalloc = RegAlloc(self, self.cpu.translate_support_code)
- arglocs, operations = regalloc.prepare_loop(inputargs, operations,
looptoken)
+ arglocs, operations = regalloc.prepare_loop(inputargs, operations,
+ looptoken, clt.allgcrefs)
looptoken._x86_arglocs = arglocs
bootstrappos = self.mc.get_relative_pos()
@@ -407,7 +409,8 @@
regalloc = RegAlloc(self, self.cpu.translate_support_code)
fail_depths = faildescr._x86_current_depths
operations = regalloc.prepare_bridge(fail_depths, inputargs, arglocs,
- operations)
+ operations,
+ self.current_clt.allgcrefs)
stackadjustpos = self._patchable_stackadjust()
frame_depth, param_depth = self._assemble(regalloc, operations)
@@ -499,9 +502,9 @@
funcname = op.getarg(0)._get_str()
break
else:
- funcname = "<loop %d>" % len(self.loop_run_counters)
- # invent the counter, so we don't get too confused
- return funcname
+ funcname = '?'
+ return "%s (loop counter %d)" % (funcname,
+ len(self.loop_run_counters))
def _register_counter(self):
if self._debug:
@@ -2079,6 +2082,8 @@
# function remember_young_pointer() from the GC. The two arguments
# to the call are in arglocs[:2]. The rest, arglocs[2:], contains
# registers that need to be saved and restored across the call.
+ # If op.getarg(1) is a int, it is an array index and we must call
+ # instead remember_young_pointer_from_array().
descr = op.getdescr()
if we_are_translated():
cls = self.cpu.gc_ll_descr.has_write_barrier_class()
@@ -2110,13 +2115,19 @@
remap_frame_layout(self, arglocs[:2], [edi, esi],
X86_64_SCRATCH_REG)
+ if op.getarg(1).type == INT:
+ func = descr.get_write_barrier_from_array_fn(self.cpu)
+ assert func != 0
+ else:
+ func = descr.get_write_barrier_fn(self.cpu)
+
# misaligned stack in the call, but it's ok because the write barrier
# is not going to call anything more. Also, this assumes that the
# write barrier does not touch the xmm registers. (Slightly delicate
# assumption, given that the write barrier can end up calling the
# platform's malloc() from AddressStack.append(). XXX may need to
# be done properly)
- self.mc.CALL(imm(descr.get_write_barrier_fn(self.cpu)))
+ self.mc.CALL(imm(func))
if IS_X86_32:
self.mc.ADD_ri(esp.value, 2*WORD)
for i in range(2, len(arglocs)):
diff --git a/pypy/jit/backend/x86/regalloc.py b/pypy/jit/backend/x86/regalloc.py
--- a/pypy/jit/backend/x86/regalloc.py
+++ b/pypy/jit/backend/x86/regalloc.py
@@ -157,11 +157,12 @@
# to be read/used by the assembler too
self.jump_target_descr = None
- def _prepare(self, inputargs, operations):
+ def _prepare(self, inputargs, operations, allgcrefs):
self.fm = X86FrameManager()
self.param_depth = 0
cpu = self.assembler.cpu
- operations = cpu.gc_ll_descr.rewrite_assembler(cpu, operations)
+ operations = cpu.gc_ll_descr.rewrite_assembler(cpu, operations,
+ allgcrefs)
# compute longevity of variables
longevity = self._compute_vars_longevity(inputargs, operations)
self.longevity = longevity
@@ -172,15 +173,16 @@
assembler = self.assembler)
return operations
- def prepare_loop(self, inputargs, operations, looptoken):
- operations = self._prepare(inputargs, operations)
+ def prepare_loop(self, inputargs, operations, looptoken, allgcrefs):
+ operations = self._prepare(inputargs, operations, allgcrefs)
jump = operations[-1]
loop_consts = self._compute_loop_consts(inputargs, jump, looptoken)
self.loop_consts = loop_consts
return self._process_inputargs(inputargs), operations
- def prepare_bridge(self, prev_depths, inputargs, arglocs, operations):
- operations = self._prepare(inputargs, operations)
+ def prepare_bridge(self, prev_depths, inputargs, arglocs, operations,
+ allgcrefs):
+ operations = self._prepare(inputargs, operations, allgcrefs)
self.loop_consts = {}
self._update_bindings(arglocs, inputargs)
self.fm.frame_depth = prev_depths[0]
@@ -864,12 +866,12 @@
def consider_cond_call_gc_wb(self, op):
assert op.result is None
args = op.getarglist()
- loc_newvalue = self.rm.make_sure_var_in_reg(op.getarg(1), args)
- # ^^^ we force loc_newvalue in a reg (unless it's a Const),
+ loc_newvalue_or_index= self.rm.make_sure_var_in_reg(op.getarg(1), args)
+ # ^^^ we force loc_newvalue_or_index in a reg (unless it's a Const),
# because it will be needed anyway by the following setfield_gc.
# It avoids loading it twice from the memory.
loc_base = self.rm.make_sure_var_in_reg(op.getarg(0), args)
- arglocs = [loc_base, loc_newvalue]
+ arglocs = [loc_base, loc_newvalue_or_index]
# add eax, ecx and edx as extra "arguments" to ensure they are
# saved and restored. Fish in self.rm to know which of these
# registers really need to be saved (a bit of a hack). Moreover,
diff --git a/pypy/jit/backend/x86/test/test_gc_integration.py
b/pypy/jit/backend/x86/test/test_gc_integration.py
--- a/pypy/jit/backend/x86/test/test_gc_integration.py
+++ b/pypy/jit/backend/x86/test/test_gc_integration.py
@@ -16,7 +16,7 @@
from pypy.rpython.lltypesystem import lltype, llmemory, rffi
from pypy.rpython.annlowlevel import llhelper
from pypy.rpython.lltypesystem import rclass, rstr
-from pypy.jit.backend.llsupport.gc import GcLLDescr_framework, GcRefList,
GcPtrFieldDescr
+from pypy.jit.backend.llsupport.gc import GcLLDescr_framework, GcPtrFieldDescr
from pypy.jit.backend.x86.test.test_regalloc import MockAssembler
from pypy.jit.backend.x86.test.test_regalloc import BaseTestRegalloc
@@ -51,11 +51,9 @@
gcrootmap = MockGcRootMap()
def initialize(self):
- self.gcrefs = GcRefList()
- self.gcrefs.initialize()
- self.single_gcref_descr = GcPtrFieldDescr('', 0)
+ pass
- replace_constptrs_with_getfield_raw =
GcLLDescr_framework.replace_constptrs_with_getfield_raw.im_func
+ record_constptrs = GcLLDescr_framework.record_constptrs.im_func
rewrite_assembler = GcLLDescr_framework.rewrite_assembler.im_func
class TestRegallocDirectGcIntegration(object):
diff --git a/pypy/jit/backend/x86/test/test_runner.py
b/pypy/jit/backend/x86/test/test_runner.py
--- a/pypy/jit/backend/x86/test/test_runner.py
+++ b/pypy/jit/backend/x86/test/test_runner.py
@@ -362,7 +362,7 @@
operations[3].setfailargs([i1])
self.cpu.compile_loop(inputargs, operations, looptoken)
name, loopaddress, loopsize = agent.functions[0]
- assert name == "Loop # 17: hello"
+ assert name == "Loop # 17: hello (loop counter 0)"
assert loopaddress <= looptoken._x86_loop_code
assert loopsize >= 40 # randomish number
@@ -378,7 +378,7 @@
self.cpu.compile_bridge(faildescr1, [i1b], bridge, looptoken)
name, address, size = agent.functions[1]
- assert name == "Bridge # 0: bye"
+ assert name == "Bridge # 0: bye (loop counter 1)"
# Would be exactly ==, but there are some guard failure recovery
# stubs in-between
assert address >= loopaddress + loopsize
diff --git a/pypy/jit/backend/x86/test/test_zrpy_gc.py
b/pypy/jit/backend/x86/test/test_zrpy_gc.py
--- a/pypy/jit/backend/x86/test/test_zrpy_gc.py
+++ b/pypy/jit/backend/x86/test/test_zrpy_gc.py
@@ -1,8 +1,7 @@
"""
-This is a test that translates a complete JIT to C and runs it. It is
-not testing much, expect that it basically works. What it *is* testing,
-however, is the correct handling of GC, i.e. if objects are freed as
-soon as possible (at least in a simple case).
+This is a test that translates a complete JIT together with a GC and runs it.
+It is testing that the GC-dependent aspects basically work, mostly the mallocs
+and the various cases of write barrier.
"""
import weakref
@@ -14,7 +13,7 @@
from pypy.rlib.jit import JitDriver, dont_look_inside
from pypy.rlib.jit import purefunction, unroll_safe
from pypy.jit.backend.x86.runner import CPU386
-from pypy.jit.backend.llsupport.gc import GcRefList, GcRootMap_asmgcc
+from pypy.jit.backend.llsupport.gc import GcRootMap_asmgcc
from pypy.jit.backend.llsupport.gc import GcLLDescr_framework
from pypy.tool.udir import udir
from pypy.jit.backend.x86.arch import IS_X86_64
@@ -456,6 +455,73 @@
def test_compile_framework_7(self):
self.run('compile_framework_7')
+ def define_compile_framework_8(cls):
+ # Array of pointers, of unknown length (test write_barrier_from_array)
+ def before(n, x):
+ return n, x, None, None, None, None, None, None, None, None,
[X(123)], None
+ def f(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
+ if n < 1900:
+ check(l[0].x == 123)
+ l = [None] * (16 + (n & 7))
+ l[0] = X(123)
+ l[1] = X(n)
+ l[2] = X(n+10)
+ l[3] = X(n+20)
+ l[4] = X(n+30)
+ l[5] = X(n+40)
+ l[6] = X(n+50)
+ l[7] = X(n+60)
+ l[8] = X(n+70)
+ l[9] = X(n+80)
+ l[10] = X(n+90)
+ l[11] = X(n+100)
+ l[12] = X(n+110)
+ l[13] = X(n+120)
+ l[14] = X(n+130)
+ l[15] = X(n+140)
+ if n < 1800:
+ check(len(l) == 16 + (n & 7))
+ check(l[0].x == 123)
+ check(l[1].x == n)
+ check(l[2].x == n+10)
+ check(l[3].x == n+20)
+ check(l[4].x == n+30)
+ check(l[5].x == n+40)
+ check(l[6].x == n+50)
+ check(l[7].x == n+60)
+ check(l[8].x == n+70)
+ check(l[9].x == n+80)
+ check(l[10].x == n+90)
+ check(l[11].x == n+100)
+ check(l[12].x == n+110)
+ check(l[13].x == n+120)
+ check(l[14].x == n+130)
+ check(l[15].x == n+140)
+ n -= x.foo
+ return n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s
+ def after(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s):
+ check(len(l) >= 16)
+ check(l[0].x == 123)
+ check(l[1].x == 2)
+ check(l[2].x == 12)
+ check(l[3].x == 22)
+ check(l[4].x == 32)
+ check(l[5].x == 42)
+ check(l[6].x == 52)
+ check(l[7].x == 62)
+ check(l[8].x == 72)
+ check(l[9].x == 82)
+ check(l[10].x == 92)
+ check(l[11].x == 102)
+ check(l[12].x == 112)
+ check(l[13].x == 122)
+ check(l[14].x == 132)
+ check(l[15].x == 142)
+ return before, f, after
+
+ def test_compile_framework_8(self):
+ self.run('compile_framework_8')
+
def define_compile_framework_external_exception_handling(cls):
def before(n, x):
x = X(0)
diff --git a/pypy/jit/metainterp/resoperation.py
b/pypy/jit/metainterp/resoperation.py
--- a/pypy/jit/metainterp/resoperation.py
+++ b/pypy/jit/metainterp/resoperation.py
@@ -471,7 +471,8 @@
'STRSETITEM/3',
'UNICODESETITEM/3',
#'RUNTIMENEW/1', # ootype operation
- 'COND_CALL_GC_WB/2d', # [objptr, newvalue] (for the write barrier)
+ 'COND_CALL_GC_WB/2d', # [objptr, newvalue] or [arrayptr, index]
+ # (for the write barrier, latter is in an array)
'DEBUG_MERGE_POINT/2', # debugging only
'JIT_DEBUG/*', # debugging only
'VIRTUAL_REF_FINISH/2', # removed before it's passed to the backend
diff --git a/pypy/rlib/rgc.py b/pypy/rlib/rgc.py
--- a/pypy/rlib/rgc.py
+++ b/pypy/rlib/rgc.py
@@ -191,6 +191,21 @@
hop.exception_cannot_occur()
return hop.genop('gc_can_move', hop.args_v, resulttype=hop.r_result)
+def _make_sure_does_not_move(p):
+ """'p' is a non-null GC object. This (tries to) make sure that the
+ object does not move any more, by forcing collections if needed.
+ Warning: should ideally only be used with the minimark GC, and only
+ on objects that are already a bit old, so have a chance to be
+ already non-movable."""
+ if not we_are_translated():
+ return
+ i = 0
+ while can_move(p):
+ if i > 6:
+ raise NotImplementedError("can't make object non-movable!")
+ collect(i)
+ i += 1
+
def _heap_stats():
raise NotImplementedError # can't be run directly
diff --git a/pypy/rpython/memory/gc/minimark.py
b/pypy/rpython/memory/gc/minimark.py
--- a/pypy/rpython/memory/gc/minimark.py
+++ b/pypy/rpython/memory/gc/minimark.py
@@ -1020,6 +1020,7 @@
objhdr.tid |= GCFLAG_CARDS_SET
remember_young_pointer_from_array._dont_inline_ = True
+ assert self.card_page_indices > 0
self.remember_young_pointer_from_array = (
remember_young_pointer_from_array)
diff --git a/pypy/rpython/memory/gctransform/framework.py
b/pypy/rpython/memory/gctransform/framework.py
--- a/pypy/rpython/memory/gctransform/framework.py
+++ b/pypy/rpython/memory/gctransform/framework.py
@@ -860,9 +860,9 @@
def gct_get_write_barrier_from_array_failing_case(self, hop):
op = hop.spaceop
- hop.genop("same_as",
- [self.write_barrier_from_array_failing_case_ptr],
- resultvar=op.result)
+ v = getattr(self, 'write_barrier_from_array_failing_case_ptr',
+ lltype.nullptr(op.result.concretetype.TO))
+ hop.genop("same_as", [v], resultvar=op.result)
def gct_zero_gc_pointers_inside(self, hop):
if not self.malloc_zero_filled:
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit