Author: Armin Rigo <ar...@tunes.org> Branch: Changeset: r44685:5a240a694bb8 Date: 2011-06-04 07:24 +0200 http://bitbucket.org/pypy/pypy/changeset/5a240a694bb8/
Log: Merge jit-write-barrier-from-array: * in case the JIT generates a SETARRAYITEM_GC on a list which it cannot prove is short enough, we should really use write_barrier_from_array instead of the default write_barrier. * get rid of GcRefList, one of the last remaining causes of leaks, as far as I can tell. diff --git a/pypy/jit/backend/llsupport/gc.py b/pypy/jit/backend/llsupport/gc.py --- a/pypy/jit/backend/llsupport/gc.py +++ b/pypy/jit/backend/llsupport/gc.py @@ -34,7 +34,7 @@ pass def do_write_barrier(self, gcref_struct, gcref_newptr): pass - def rewrite_assembler(self, cpu, operations): + def rewrite_assembler(self, cpu, operations, gcrefs_output_list): return operations def can_inline_malloc(self, descr): return False @@ -146,78 +146,6 @@ # All code below is for the hybrid or minimark GC -class GcRefList: - """Handles all references from the generated assembler to GC objects. - This is implemented as a nonmovable, but GC, list; the assembler contains - code that will (for now) always read from this list.""" - - GCREF_LIST = lltype.GcArray(llmemory.GCREF) # followed by the GC - - HASHTABLE = rffi.CArray(llmemory.Address) # ignored by the GC - HASHTABLE_BITS = 10 - HASHTABLE_SIZE = 1 << HASHTABLE_BITS - - def initialize(self): - if we_are_translated(): n = 2000 - else: n = 10 # tests only - self.list = self.alloc_gcref_list(n) - self.nextindex = 0 - self.oldlists = [] - # A pseudo dictionary: it is fixed size, and it may contain - # random nonsense after a collection moved the objects. It is only - # used to avoid too many duplications in the GCREF_LISTs. - self.hashtable = lltype.malloc(self.HASHTABLE, - self.HASHTABLE_SIZE+1, - flavor='raw', track_allocation=False) - dummy = lltype.direct_ptradd(lltype.direct_arrayitems(self.hashtable), - self.HASHTABLE_SIZE) - dummy = llmemory.cast_ptr_to_adr(dummy) - for i in range(self.HASHTABLE_SIZE+1): - self.hashtable[i] = dummy - - def alloc_gcref_list(self, n): - # Important: the GRREF_LISTs allocated are *non-movable*. This - # requires support in the gc (hybrid GC or minimark GC so far). - if we_are_translated(): - list = rgc.malloc_nonmovable(self.GCREF_LIST, n) - assert list, "malloc_nonmovable failed!" - else: - list = lltype.malloc(self.GCREF_LIST, n) # for tests only - return list - - def get_address_of_gcref(self, gcref): - assert lltype.typeOf(gcref) == llmemory.GCREF - # first look in the hashtable, using an inexact hash (fails after - # the object moves) - addr = llmemory.cast_ptr_to_adr(gcref) - hash = llmemory.cast_adr_to_int(addr, "forced") - hash -= hash >> self.HASHTABLE_BITS - hash &= self.HASHTABLE_SIZE - 1 - addr_ref = self.hashtable[hash] - # the following test is safe anyway, because the addresses found - # in the hashtable are always the addresses of nonmovable stuff - # ('addr_ref' is an address inside self.list, not directly the - # address of a real moving GC object -- that's 'addr_ref.address[0]'.) - if addr_ref.address[0] == addr: - return addr_ref - # if it fails, add an entry to the list - if self.nextindex == len(self.list): - # reallocate first, increasing a bit the size every time - self.oldlists.append(self.list) - self.list = self.alloc_gcref_list(len(self.list) // 4 * 5) - self.nextindex = 0 - # add it - index = self.nextindex - self.list[index] = gcref - addr_ref = lltype.direct_ptradd(lltype.direct_arrayitems(self.list), - index) - addr_ref = llmemory.cast_ptr_to_adr(addr_ref) - self.nextindex = index + 1 - # record it in the hashtable - self.hashtable[hash] = addr_ref - return addr_ref - - class GcRootMap_asmgcc(object): """Handles locating the stack roots in the assembler. This is the class supporting --gcrootfinder=asmgcc. @@ -527,6 +455,7 @@ def __init__(self, gc_ll_descr): self.llop1 = gc_ll_descr.llop1 self.WB_FUNCPTR = gc_ll_descr.WB_FUNCPTR + self.WB_ARRAY_FUNCPTR = gc_ll_descr.WB_ARRAY_FUNCPTR self.fielddescr_tid = get_field_descr(gc_ll_descr, gc_ll_descr.GCClass.HDR, 'tid') self.jit_wb_if_flag = gc_ll_descr.GCClass.JIT_WB_IF_FLAG @@ -546,6 +475,13 @@ funcaddr = llmemory.cast_ptr_to_adr(funcptr) return cpu.cast_adr_to_int(funcaddr) + def get_write_barrier_from_array_fn(self, cpu): + llop1 = self.llop1 + funcptr = llop1.get_write_barrier_from_array_failing_case( + self.WB_ARRAY_FUNCPTR) + funcaddr = llmemory.cast_ptr_to_adr(funcptr) + return cpu.cast_adr_to_int(funcaddr) # this may return 0 + class GcLLDescr_framework(GcLLDescription): DEBUG = False # forced to True by x86/test/test_zrpy_gc.py @@ -559,7 +495,7 @@ self.translator = translator self.llop1 = llop1 - # we need the hybrid or minimark GC for GcRefList.alloc_gcref_list() + # we need the hybrid or minimark GC for rgc._make_sure_does_not_move() # to work if gcdescr.config.translation.gc not in ('hybrid', 'minimark'): raise NotImplementedError("--gc=%s not implemented with the JIT" % @@ -574,8 +510,6 @@ " with the JIT" % (name,)) gcrootmap = cls(gcdescr) self.gcrootmap = gcrootmap - self.gcrefs = GcRefList() - self.single_gcref_descr = GcPtrFieldDescr('', 0) # make a TransformerLayoutBuilder and save it on the translator # where it can be fished and reused by the FrameworkGCTransformer @@ -617,6 +551,8 @@ [lltype.Signed, lltype.Signed], llmemory.GCREF)) self.WB_FUNCPTR = lltype.Ptr(lltype.FuncType( [llmemory.Address, llmemory.Address], lltype.Void)) + self.WB_ARRAY_FUNCPTR = lltype.Ptr(lltype.FuncType( + [llmemory.Address, lltype.Signed], lltype.Void)) self.write_barrier_descr = WriteBarrierDescr(self) # def malloc_array(itemsize, tid, num_elem): @@ -706,7 +642,6 @@ return rffi.cast(lltype.Signed, fptr) def initialize(self): - self.gcrefs.initialize() self.gcrootmap.initialize() def init_size_descr(self, S, descr): @@ -768,54 +703,32 @@ funcptr(llmemory.cast_ptr_to_adr(gcref_struct), llmemory.cast_ptr_to_adr(gcref_newptr)) - def replace_constptrs_with_getfield_raw(self, cpu, newops, op): - # xxx some performance issue here - newargs = [None] * op.numargs() - needs_copy = False + def record_constptrs(self, op, gcrefs_output_list): for i in range(op.numargs()): v = op.getarg(i) - newargs[i] = v if isinstance(v, ConstPtr) and bool(v.value): - addr = self.gcrefs.get_address_of_gcref(v.value) - # ^^^even for non-movable objects, to record their presence - if rgc.can_move(v.value): - box = BoxPtr(v.value) - addr = cpu.cast_adr_to_int(addr) - newops.append(ResOperation(rop.GETFIELD_RAW, - [ConstInt(addr)], box, - self.single_gcref_descr)) - newargs[i] = box - needs_copy = True - # - if needs_copy: - return op.copy_and_change(op.getopnum(), args=newargs) - else: - return op + p = v.value + rgc._make_sure_does_not_move(p) + gcrefs_output_list.append(p) - - def rewrite_assembler(self, cpu, operations): + def rewrite_assembler(self, cpu, operations, gcrefs_output_list): # Perform two kinds of rewrites in parallel: # # - Add COND_CALLs to the write barrier before SETFIELD_GC and # SETARRAYITEM_GC operations. # - # - Remove all uses of ConstPtrs away from the assembler. - # Idea: when running on a moving GC, we can't (easily) encode - # the ConstPtrs in the assembler, because they can move at any - # point in time. Instead, we store them in 'gcrefs.list', a GC - # but nonmovable list; and here, we modify 'operations' to - # replace direct usage of ConstPtr with a BoxPtr loaded by a - # GETFIELD_RAW from the array 'gcrefs.list'. + # - Record the ConstPtrs from the assembler. # newops = [] + known_lengths = {} # we can only remember one malloc since the next malloc can possibly # collect last_malloc = None for op in operations: if op.getopnum() == rop.DEBUG_MERGE_POINT: continue - # ---------- replace ConstPtrs with GETFIELD_RAW ---------- - op = self.replace_constptrs_with_getfield_raw(cpu, newops, op) + # ---------- record the ConstPtrs ---------- + self.record_constptrs(op, gcrefs_output_list) if op.is_malloc(): last_malloc = op.result elif op.can_malloc(): @@ -838,19 +751,40 @@ v = op.getarg(2) if isinstance(v, BoxPtr) or (isinstance(v, ConstPtr) and bool(v.value)): # store a non-NULL - # XXX detect when we should produce a - # write_barrier_from_array - self._gen_write_barrier(newops, op.getarg(0), v) + self._gen_write_barrier_array(newops, op.getarg(0), + op.getarg(1), v, + cpu, known_lengths) op = op.copy_and_change(rop.SETARRAYITEM_RAW) + elif op.getopnum() == rop.NEW_ARRAY: + v_length = op.getarg(0) + if isinstance(v_length, ConstInt): + known_lengths[op.result] = v_length.getint() # ---------- newops.append(op) return newops - def _gen_write_barrier(self, newops, v_base, v_value): - args = [v_base, v_value] + def _gen_write_barrier(self, newops, v_base, v_value_or_index): + # NB. the 2nd argument of COND_CALL_GC_WB is either a pointer + # (regular case), or an index (case of write_barrier_from_array) + args = [v_base, v_value_or_index] newops.append(ResOperation(rop.COND_CALL_GC_WB, args, None, descr=self.write_barrier_descr)) + def _gen_write_barrier_array(self, newops, v_base, v_index, v_value, + cpu, known_lengths): + if self.write_barrier_descr.get_write_barrier_from_array_fn(cpu) != 0: + # If we know statically the length of 'v', and it is not too + # big, then produce a regular write_barrier. If it's unknown or + # too big, produce instead a write_barrier_from_array. + LARGE = 130 + length = known_lengths.get(v_base, LARGE) + if length >= LARGE: + # unknown or too big: produce a write_barrier_from_array + self._gen_write_barrier(newops, v_base, v_index) + return + # fall-back case: produce a write_barrier + self._gen_write_barrier(newops, v_base, v_value) + def can_inline_malloc(self, descr): assert isinstance(descr, BaseSizeDescr) if descr.size < self.max_size_of_young_obj: diff --git a/pypy/jit/backend/llsupport/test/test_gc.py b/pypy/jit/backend/llsupport/test/test_gc.py --- a/pypy/jit/backend/llsupport/test/test_gc.py +++ b/pypy/jit/backend/llsupport/test/test_gc.py @@ -49,19 +49,6 @@ # ____________________________________________________________ -def test_GcRefList(): - S = lltype.GcStruct('S') - order = range(50) * 4 - random.shuffle(order) - allocs = [lltype.cast_opaque_ptr(llmemory.GCREF, lltype.malloc(S)) - for i in range(50)] - allocs = [allocs[i] for i in order] - # - gcrefs = GcRefList() - gcrefs.initialize() - addrs = [gcrefs.get_address_of_gcref(ptr) for ptr in allocs] - for i in range(len(allocs)): - assert addrs[i].address[0] == llmemory.cast_ptr_to_adr(allocs[i]) class TestGcRootMapAsmGcc: @@ -288,6 +275,18 @@ def get_write_barrier_failing_case(self, FPTRTYPE): return llhelper(FPTRTYPE, self._write_barrier_failing_case) + _have_wb_from_array = False + + def _write_barrier_from_array_failing_case(self, adr_struct, v_index): + self.record.append(('barrier_from_array', adr_struct, v_index)) + + def get_write_barrier_from_array_failing_case(self, FPTRTYPE): + if self._have_wb_from_array: + return llhelper(FPTRTYPE, + self._write_barrier_from_array_failing_case) + else: + return lltype.nullptr(FPTRTYPE.TO) + class TestFramework(object): gc = 'hybrid' @@ -303,9 +302,20 @@ config = config_ class FakeCPU(object): def cast_adr_to_int(self, adr): - ptr = llmemory.cast_adr_to_ptr(adr, gc_ll_descr.WB_FUNCPTR) - assert ptr._obj._callable == llop1._write_barrier_failing_case - return 42 + if not adr: + return 0 + try: + ptr = llmemory.cast_adr_to_ptr(adr, gc_ll_descr.WB_FUNCPTR) + assert ptr._obj._callable == \ + llop1._write_barrier_failing_case + return 42 + except lltype.InvalidCast: + ptr = llmemory.cast_adr_to_ptr( + adr, gc_ll_descr.WB_ARRAY_FUNCPTR) + assert ptr._obj._callable == \ + llop1._write_barrier_from_array_failing_case + return 43 + gcdescr = get_description(config_) translator = FakeTranslator() llop1 = FakeLLOp() @@ -414,11 +424,11 @@ ResOperation(rop.DEBUG_MERGE_POINT, ['dummy', 2], None), ] gc_ll_descr = self.gc_ll_descr - operations = gc_ll_descr.rewrite_assembler(None, operations) + operations = gc_ll_descr.rewrite_assembler(None, operations, []) assert len(operations) == 0 def test_rewrite_assembler_1(self): - # check rewriting of ConstPtrs + # check recording of ConstPtrs class MyFakeCPU(object): def cast_adr_to_int(self, adr): assert adr == "some fake address" @@ -438,56 +448,12 @@ ] gc_ll_descr = self.gc_ll_descr gc_ll_descr.gcrefs = MyFakeGCRefList() + gcrefs = [] operations = get_deep_immutable_oplist(operations) - operations = gc_ll_descr.rewrite_assembler(MyFakeCPU(), operations) - assert len(operations) == 2 - assert operations[0].getopnum() == rop.GETFIELD_RAW - assert operations[0].getarg(0) == ConstInt(43) - assert operations[0].getdescr() == gc_ll_descr.single_gcref_descr - v_box = operations[0].result - assert isinstance(v_box, BoxPtr) - assert operations[1].getopnum() == rop.PTR_EQ - assert operations[1].getarg(0) == v_random_box - assert operations[1].getarg(1) == v_box - assert operations[1].result == v_result - - def test_rewrite_assembler_1_cannot_move(self): - # check rewriting of ConstPtrs - class MyFakeCPU(object): - def cast_adr_to_int(self, adr): - xxx # should not be called - class MyFakeGCRefList(object): - def get_address_of_gcref(self, s_gcref1): - seen.append(s_gcref1) - assert s_gcref1 == s_gcref - return "some fake address" - seen = [] - S = lltype.GcStruct('S') - s = lltype.malloc(S) - s_gcref = lltype.cast_opaque_ptr(llmemory.GCREF, s) - v_random_box = BoxPtr() - v_result = BoxInt() - operations = [ - ResOperation(rop.PTR_EQ, [v_random_box, ConstPtr(s_gcref)], - v_result), - ] - gc_ll_descr = self.gc_ll_descr - gc_ll_descr.gcrefs = MyFakeGCRefList() - old_can_move = rgc.can_move - operations = get_deep_immutable_oplist(operations) - try: - rgc.can_move = lambda s: False - operations = gc_ll_descr.rewrite_assembler(MyFakeCPU(), operations) - finally: - rgc.can_move = old_can_move - assert len(operations) == 1 - assert operations[0].getopnum() == rop.PTR_EQ - assert operations[0].getarg(0) == v_random_box - assert operations[0].getarg(1) == ConstPtr(s_gcref) - assert operations[0].result == v_result - # check that s_gcref gets added to the list anyway, to make sure - # that the GC sees it - assert seen == [s_gcref] + operations2 = gc_ll_descr.rewrite_assembler(MyFakeCPU(), operations, + gcrefs) + assert operations2 == operations + assert gcrefs == [s_gcref] def test_rewrite_assembler_2(self): # check write barriers before SETFIELD_GC @@ -500,7 +466,8 @@ ] gc_ll_descr = self.gc_ll_descr operations = get_deep_immutable_oplist(operations) - operations = gc_ll_descr.rewrite_assembler(self.fake_cpu, operations) + operations = gc_ll_descr.rewrite_assembler(self.fake_cpu, operations, + []) assert len(operations) == 2 # assert operations[0].getopnum() == rop.COND_CALL_GC_WB @@ -515,29 +482,90 @@ def test_rewrite_assembler_3(self): # check write barriers before SETARRAYITEM_GC - v_base = BoxPtr() - v_index = BoxInt() - v_value = BoxPtr() - array_descr = AbstractDescr() - operations = [ - ResOperation(rop.SETARRAYITEM_GC, [v_base, v_index, v_value], None, - descr=array_descr), - ] - gc_ll_descr = self.gc_ll_descr - operations = get_deep_immutable_oplist(operations) - operations = gc_ll_descr.rewrite_assembler(self.fake_cpu, operations) - assert len(operations) == 2 - # - assert operations[0].getopnum() == rop.COND_CALL_GC_WB - assert operations[0].getarg(0) == v_base - assert operations[0].getarg(1) == v_value - assert operations[0].result is None - # - assert operations[1].getopnum() == rop.SETARRAYITEM_RAW - assert operations[1].getarg(0) == v_base - assert operations[1].getarg(1) == v_index - assert operations[1].getarg(2) == v_value - assert operations[1].getdescr() == array_descr + for v_new_length in (None, ConstInt(5), ConstInt(5000), BoxInt()): + v_base = BoxPtr() + v_index = BoxInt() + v_value = BoxPtr() + array_descr = AbstractDescr() + operations = [ + ResOperation(rop.SETARRAYITEM_GC, [v_base, v_index, v_value], + None, descr=array_descr), + ] + if v_new_length is not None: + operations.insert(0, ResOperation(rop.NEW_ARRAY, + [v_new_length], v_base, + descr=array_descr)) + # we need to insert another, unrelated NEW_ARRAY here + # to prevent the initialization_store optimization + operations.insert(1, ResOperation(rop.NEW_ARRAY, + [ConstInt(12)], BoxPtr(), + descr=array_descr)) + gc_ll_descr = self.gc_ll_descr + operations = get_deep_immutable_oplist(operations) + operations = gc_ll_descr.rewrite_assembler(self.fake_cpu, + operations, []) + if v_new_length is not None: + assert operations[0].getopnum() == rop.NEW_ARRAY + assert operations[1].getopnum() == rop.NEW_ARRAY + del operations[:2] + assert len(operations) == 2 + # + assert operations[0].getopnum() == rop.COND_CALL_GC_WB + assert operations[0].getarg(0) == v_base + assert operations[0].getarg(1) == v_value + assert operations[0].result is None + # + assert operations[1].getopnum() == rop.SETARRAYITEM_RAW + assert operations[1].getarg(0) == v_base + assert operations[1].getarg(1) == v_index + assert operations[1].getarg(2) == v_value + assert operations[1].getdescr() == array_descr + + def test_rewrite_assembler_4(self): + # check write barriers before SETARRAYITEM_GC, + # if we have actually a write_barrier_from_array. + self.llop1._have_wb_from_array = True + for v_new_length in (None, ConstInt(5), ConstInt(5000), BoxInt()): + v_base = BoxPtr() + v_index = BoxInt() + v_value = BoxPtr() + array_descr = AbstractDescr() + operations = [ + ResOperation(rop.SETARRAYITEM_GC, [v_base, v_index, v_value], + None, descr=array_descr), + ] + if v_new_length is not None: + operations.insert(0, ResOperation(rop.NEW_ARRAY, + [v_new_length], v_base, + descr=array_descr)) + # we need to insert another, unrelated NEW_ARRAY here + # to prevent the initialization_store optimization + operations.insert(1, ResOperation(rop.NEW_ARRAY, + [ConstInt(12)], BoxPtr(), + descr=array_descr)) + gc_ll_descr = self.gc_ll_descr + operations = get_deep_immutable_oplist(operations) + operations = gc_ll_descr.rewrite_assembler(self.fake_cpu, + operations, []) + if v_new_length is not None: + assert operations[0].getopnum() == rop.NEW_ARRAY + assert operations[1].getopnum() == rop.NEW_ARRAY + del operations[:2] + assert len(operations) == 2 + # + assert operations[0].getopnum() == rop.COND_CALL_GC_WB + assert operations[0].getarg(0) == v_base + if isinstance(v_new_length, ConstInt) and v_new_length.value < 130: + assert operations[0].getarg(1) == v_value + else: + assert operations[0].getarg(1) == v_index + assert operations[0].result is None + # + assert operations[1].getopnum() == rop.SETARRAYITEM_RAW + assert operations[1].getarg(0) == v_base + assert operations[1].getarg(1) == v_index + assert operations[1].getarg(2) == v_value + assert operations[1].getdescr() == array_descr def test_rewrite_assembler_initialization_store(self): S = lltype.GcStruct('S', ('parent', OBJECT), @@ -558,7 +586,8 @@ jump() """, namespace=locals()) operations = get_deep_immutable_oplist(ops.operations) - operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu, operations) + operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu, + operations, []) equaloplists(operations, expected.operations) def test_rewrite_assembler_initialization_store_2(self): @@ -583,7 +612,8 @@ jump() """, namespace=locals()) operations = get_deep_immutable_oplist(ops.operations) - operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu, operations) + operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu, + operations, []) equaloplists(operations, expected.operations) def test_rewrite_assembler_initialization_store_3(self): @@ -602,7 +632,8 @@ jump() """, namespace=locals()) operations = get_deep_immutable_oplist(ops.operations) - operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu, operations) + operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu, + operations, []) equaloplists(operations, expected.operations) class TestFrameworkMiniMark(TestFramework): diff --git a/pypy/jit/backend/test/runner_test.py b/pypy/jit/backend/test/runner_test.py --- a/pypy/jit/backend/test/runner_test.py +++ b/pypy/jit/backend/test/runner_test.py @@ -1680,7 +1680,7 @@ record = [] # S = lltype.GcStruct('S', ('tid', lltype.Signed)) - FUNC = self.FuncType([lltype.Ptr(S), lltype.Signed], lltype.Void) + FUNC = self.FuncType([lltype.Ptr(S), lltype.Ptr(S)], lltype.Void) func_ptr = llhelper(lltype.Ptr(FUNC), func_void) funcbox = self.get_funcbox(self.cpu, func_ptr) class WriteBarrierDescr(AbstractDescr): @@ -1699,12 +1699,48 @@ s = lltype.malloc(S) s.tid = value sgcref = lltype.cast_opaque_ptr(llmemory.GCREF, s) + t = lltype.malloc(S) + tgcref = lltype.cast_opaque_ptr(llmemory.GCREF, t) del record[:] self.execute_operation(rop.COND_CALL_GC_WB, - [BoxPtr(sgcref), ConstInt(-2121)], + [BoxPtr(sgcref), ConstPtr(tgcref)], 'void', descr=WriteBarrierDescr()) if cond: - assert record == [(s, -2121)] + assert record == [(s, t)] + else: + assert record == [] + + def test_cond_call_gc_wb_array(self): + def func_void(a, b): + record.append((a, b)) + record = [] + # + S = lltype.GcStruct('S', ('tid', lltype.Signed)) + FUNC = self.FuncType([lltype.Ptr(S), lltype.Signed], lltype.Void) + func_ptr = llhelper(lltype.Ptr(FUNC), func_void) + funcbox = self.get_funcbox(self.cpu, func_ptr) + class WriteBarrierDescr(AbstractDescr): + jit_wb_if_flag = 4096 + jit_wb_if_flag_byteofs = struct.pack("i", 4096).index('\x10') + jit_wb_if_flag_singlebyte = 0x10 + def get_write_barrier_from_array_fn(self, cpu): + return funcbox.getint() + # + for cond in [False, True]: + value = random.randrange(-sys.maxint, sys.maxint) + if cond: + value |= 4096 + else: + value &= ~4096 + s = lltype.malloc(S) + s.tid = value + sgcref = lltype.cast_opaque_ptr(llmemory.GCREF, s) + del record[:] + self.execute_operation(rop.COND_CALL_GC_WB, + [BoxPtr(sgcref), ConstInt(123)], + 'void', descr=WriteBarrierDescr()) + if cond: + assert record == [(s, 123)] else: assert record == [] diff --git a/pypy/jit/backend/x86/assembler.py b/pypy/jit/backend/x86/assembler.py --- a/pypy/jit/backend/x86/assembler.py +++ b/pypy/jit/backend/x86/assembler.py @@ -322,6 +322,7 @@ # for the duration of compiling one loop or a one bridge. clt = CompiledLoopToken(self.cpu, looptoken.number) + clt.allgcrefs = [] looptoken.compiled_loop_token = clt if not we_are_translated(): # Arguments should be unique @@ -335,7 +336,8 @@ operations = self._inject_debugging_code(looptoken, operations) regalloc = RegAlloc(self, self.cpu.translate_support_code) - arglocs, operations = regalloc.prepare_loop(inputargs, operations, looptoken) + arglocs, operations = regalloc.prepare_loop(inputargs, operations, + looptoken, clt.allgcrefs) looptoken._x86_arglocs = arglocs bootstrappos = self.mc.get_relative_pos() @@ -407,7 +409,8 @@ regalloc = RegAlloc(self, self.cpu.translate_support_code) fail_depths = faildescr._x86_current_depths operations = regalloc.prepare_bridge(fail_depths, inputargs, arglocs, - operations) + operations, + self.current_clt.allgcrefs) stackadjustpos = self._patchable_stackadjust() frame_depth, param_depth = self._assemble(regalloc, operations) @@ -499,9 +502,9 @@ funcname = op.getarg(0)._get_str() break else: - funcname = "<loop %d>" % len(self.loop_run_counters) - # invent the counter, so we don't get too confused - return funcname + funcname = '?' + return "%s (loop counter %d)" % (funcname, + len(self.loop_run_counters)) def _register_counter(self): if self._debug: @@ -2079,6 +2082,8 @@ # function remember_young_pointer() from the GC. The two arguments # to the call are in arglocs[:2]. The rest, arglocs[2:], contains # registers that need to be saved and restored across the call. + # If op.getarg(1) is a int, it is an array index and we must call + # instead remember_young_pointer_from_array(). descr = op.getdescr() if we_are_translated(): cls = self.cpu.gc_ll_descr.has_write_barrier_class() @@ -2110,13 +2115,19 @@ remap_frame_layout(self, arglocs[:2], [edi, esi], X86_64_SCRATCH_REG) + if op.getarg(1).type == INT: + func = descr.get_write_barrier_from_array_fn(self.cpu) + assert func != 0 + else: + func = descr.get_write_barrier_fn(self.cpu) + # misaligned stack in the call, but it's ok because the write barrier # is not going to call anything more. Also, this assumes that the # write barrier does not touch the xmm registers. (Slightly delicate # assumption, given that the write barrier can end up calling the # platform's malloc() from AddressStack.append(). XXX may need to # be done properly) - self.mc.CALL(imm(descr.get_write_barrier_fn(self.cpu))) + self.mc.CALL(imm(func)) if IS_X86_32: self.mc.ADD_ri(esp.value, 2*WORD) for i in range(2, len(arglocs)): diff --git a/pypy/jit/backend/x86/regalloc.py b/pypy/jit/backend/x86/regalloc.py --- a/pypy/jit/backend/x86/regalloc.py +++ b/pypy/jit/backend/x86/regalloc.py @@ -157,11 +157,12 @@ # to be read/used by the assembler too self.jump_target_descr = None - def _prepare(self, inputargs, operations): + def _prepare(self, inputargs, operations, allgcrefs): self.fm = X86FrameManager() self.param_depth = 0 cpu = self.assembler.cpu - operations = cpu.gc_ll_descr.rewrite_assembler(cpu, operations) + operations = cpu.gc_ll_descr.rewrite_assembler(cpu, operations, + allgcrefs) # compute longevity of variables longevity = self._compute_vars_longevity(inputargs, operations) self.longevity = longevity @@ -172,15 +173,16 @@ assembler = self.assembler) return operations - def prepare_loop(self, inputargs, operations, looptoken): - operations = self._prepare(inputargs, operations) + def prepare_loop(self, inputargs, operations, looptoken, allgcrefs): + operations = self._prepare(inputargs, operations, allgcrefs) jump = operations[-1] loop_consts = self._compute_loop_consts(inputargs, jump, looptoken) self.loop_consts = loop_consts return self._process_inputargs(inputargs), operations - def prepare_bridge(self, prev_depths, inputargs, arglocs, operations): - operations = self._prepare(inputargs, operations) + def prepare_bridge(self, prev_depths, inputargs, arglocs, operations, + allgcrefs): + operations = self._prepare(inputargs, operations, allgcrefs) self.loop_consts = {} self._update_bindings(arglocs, inputargs) self.fm.frame_depth = prev_depths[0] @@ -864,12 +866,12 @@ def consider_cond_call_gc_wb(self, op): assert op.result is None args = op.getarglist() - loc_newvalue = self.rm.make_sure_var_in_reg(op.getarg(1), args) - # ^^^ we force loc_newvalue in a reg (unless it's a Const), + loc_newvalue_or_index= self.rm.make_sure_var_in_reg(op.getarg(1), args) + # ^^^ we force loc_newvalue_or_index in a reg (unless it's a Const), # because it will be needed anyway by the following setfield_gc. # It avoids loading it twice from the memory. loc_base = self.rm.make_sure_var_in_reg(op.getarg(0), args) - arglocs = [loc_base, loc_newvalue] + arglocs = [loc_base, loc_newvalue_or_index] # add eax, ecx and edx as extra "arguments" to ensure they are # saved and restored. Fish in self.rm to know which of these # registers really need to be saved (a bit of a hack). Moreover, diff --git a/pypy/jit/backend/x86/test/test_gc_integration.py b/pypy/jit/backend/x86/test/test_gc_integration.py --- a/pypy/jit/backend/x86/test/test_gc_integration.py +++ b/pypy/jit/backend/x86/test/test_gc_integration.py @@ -16,7 +16,7 @@ from pypy.rpython.lltypesystem import lltype, llmemory, rffi from pypy.rpython.annlowlevel import llhelper from pypy.rpython.lltypesystem import rclass, rstr -from pypy.jit.backend.llsupport.gc import GcLLDescr_framework, GcRefList, GcPtrFieldDescr +from pypy.jit.backend.llsupport.gc import GcLLDescr_framework, GcPtrFieldDescr from pypy.jit.backend.x86.test.test_regalloc import MockAssembler from pypy.jit.backend.x86.test.test_regalloc import BaseTestRegalloc @@ -51,11 +51,9 @@ gcrootmap = MockGcRootMap() def initialize(self): - self.gcrefs = GcRefList() - self.gcrefs.initialize() - self.single_gcref_descr = GcPtrFieldDescr('', 0) + pass - replace_constptrs_with_getfield_raw = GcLLDescr_framework.replace_constptrs_with_getfield_raw.im_func + record_constptrs = GcLLDescr_framework.record_constptrs.im_func rewrite_assembler = GcLLDescr_framework.rewrite_assembler.im_func class TestRegallocDirectGcIntegration(object): diff --git a/pypy/jit/backend/x86/test/test_runner.py b/pypy/jit/backend/x86/test/test_runner.py --- a/pypy/jit/backend/x86/test/test_runner.py +++ b/pypy/jit/backend/x86/test/test_runner.py @@ -362,7 +362,7 @@ operations[3].setfailargs([i1]) self.cpu.compile_loop(inputargs, operations, looptoken) name, loopaddress, loopsize = agent.functions[0] - assert name == "Loop # 17: hello" + assert name == "Loop # 17: hello (loop counter 0)" assert loopaddress <= looptoken._x86_loop_code assert loopsize >= 40 # randomish number @@ -378,7 +378,7 @@ self.cpu.compile_bridge(faildescr1, [i1b], bridge, looptoken) name, address, size = agent.functions[1] - assert name == "Bridge # 0: bye" + assert name == "Bridge # 0: bye (loop counter 1)" # Would be exactly ==, but there are some guard failure recovery # stubs in-between assert address >= loopaddress + loopsize diff --git a/pypy/jit/backend/x86/test/test_zrpy_gc.py b/pypy/jit/backend/x86/test/test_zrpy_gc.py --- a/pypy/jit/backend/x86/test/test_zrpy_gc.py +++ b/pypy/jit/backend/x86/test/test_zrpy_gc.py @@ -1,8 +1,7 @@ """ -This is a test that translates a complete JIT to C and runs it. It is -not testing much, expect that it basically works. What it *is* testing, -however, is the correct handling of GC, i.e. if objects are freed as -soon as possible (at least in a simple case). +This is a test that translates a complete JIT together with a GC and runs it. +It is testing that the GC-dependent aspects basically work, mostly the mallocs +and the various cases of write barrier. """ import weakref @@ -14,7 +13,7 @@ from pypy.rlib.jit import JitDriver, dont_look_inside from pypy.rlib.jit import purefunction, unroll_safe from pypy.jit.backend.x86.runner import CPU386 -from pypy.jit.backend.llsupport.gc import GcRefList, GcRootMap_asmgcc +from pypy.jit.backend.llsupport.gc import GcRootMap_asmgcc from pypy.jit.backend.llsupport.gc import GcLLDescr_framework from pypy.tool.udir import udir from pypy.jit.backend.x86.arch import IS_X86_64 @@ -456,6 +455,73 @@ def test_compile_framework_7(self): self.run('compile_framework_7') + def define_compile_framework_8(cls): + # Array of pointers, of unknown length (test write_barrier_from_array) + def before(n, x): + return n, x, None, None, None, None, None, None, None, None, [X(123)], None + def f(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s): + if n < 1900: + check(l[0].x == 123) + l = [None] * (16 + (n & 7)) + l[0] = X(123) + l[1] = X(n) + l[2] = X(n+10) + l[3] = X(n+20) + l[4] = X(n+30) + l[5] = X(n+40) + l[6] = X(n+50) + l[7] = X(n+60) + l[8] = X(n+70) + l[9] = X(n+80) + l[10] = X(n+90) + l[11] = X(n+100) + l[12] = X(n+110) + l[13] = X(n+120) + l[14] = X(n+130) + l[15] = X(n+140) + if n < 1800: + check(len(l) == 16 + (n & 7)) + check(l[0].x == 123) + check(l[1].x == n) + check(l[2].x == n+10) + check(l[3].x == n+20) + check(l[4].x == n+30) + check(l[5].x == n+40) + check(l[6].x == n+50) + check(l[7].x == n+60) + check(l[8].x == n+70) + check(l[9].x == n+80) + check(l[10].x == n+90) + check(l[11].x == n+100) + check(l[12].x == n+110) + check(l[13].x == n+120) + check(l[14].x == n+130) + check(l[15].x == n+140) + n -= x.foo + return n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s + def after(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s): + check(len(l) >= 16) + check(l[0].x == 123) + check(l[1].x == 2) + check(l[2].x == 12) + check(l[3].x == 22) + check(l[4].x == 32) + check(l[5].x == 42) + check(l[6].x == 52) + check(l[7].x == 62) + check(l[8].x == 72) + check(l[9].x == 82) + check(l[10].x == 92) + check(l[11].x == 102) + check(l[12].x == 112) + check(l[13].x == 122) + check(l[14].x == 132) + check(l[15].x == 142) + return before, f, after + + def test_compile_framework_8(self): + self.run('compile_framework_8') + def define_compile_framework_external_exception_handling(cls): def before(n, x): x = X(0) diff --git a/pypy/jit/metainterp/resoperation.py b/pypy/jit/metainterp/resoperation.py --- a/pypy/jit/metainterp/resoperation.py +++ b/pypy/jit/metainterp/resoperation.py @@ -471,7 +471,8 @@ 'STRSETITEM/3', 'UNICODESETITEM/3', #'RUNTIMENEW/1', # ootype operation - 'COND_CALL_GC_WB/2d', # [objptr, newvalue] (for the write barrier) + 'COND_CALL_GC_WB/2d', # [objptr, newvalue] or [arrayptr, index] + # (for the write barrier, latter is in an array) 'DEBUG_MERGE_POINT/2', # debugging only 'JIT_DEBUG/*', # debugging only 'VIRTUAL_REF_FINISH/2', # removed before it's passed to the backend diff --git a/pypy/rlib/rgc.py b/pypy/rlib/rgc.py --- a/pypy/rlib/rgc.py +++ b/pypy/rlib/rgc.py @@ -191,6 +191,21 @@ hop.exception_cannot_occur() return hop.genop('gc_can_move', hop.args_v, resulttype=hop.r_result) +def _make_sure_does_not_move(p): + """'p' is a non-null GC object. This (tries to) make sure that the + object does not move any more, by forcing collections if needed. + Warning: should ideally only be used with the minimark GC, and only + on objects that are already a bit old, so have a chance to be + already non-movable.""" + if not we_are_translated(): + return + i = 0 + while can_move(p): + if i > 6: + raise NotImplementedError("can't make object non-movable!") + collect(i) + i += 1 + def _heap_stats(): raise NotImplementedError # can't be run directly diff --git a/pypy/rpython/memory/gc/minimark.py b/pypy/rpython/memory/gc/minimark.py --- a/pypy/rpython/memory/gc/minimark.py +++ b/pypy/rpython/memory/gc/minimark.py @@ -1020,6 +1020,7 @@ objhdr.tid |= GCFLAG_CARDS_SET remember_young_pointer_from_array._dont_inline_ = True + assert self.card_page_indices > 0 self.remember_young_pointer_from_array = ( remember_young_pointer_from_array) diff --git a/pypy/rpython/memory/gctransform/framework.py b/pypy/rpython/memory/gctransform/framework.py --- a/pypy/rpython/memory/gctransform/framework.py +++ b/pypy/rpython/memory/gctransform/framework.py @@ -860,9 +860,9 @@ def gct_get_write_barrier_from_array_failing_case(self, hop): op = hop.spaceop - hop.genop("same_as", - [self.write_barrier_from_array_failing_case_ptr], - resultvar=op.result) + v = getattr(self, 'write_barrier_from_array_failing_case_ptr', + lltype.nullptr(op.result.concretetype.TO)) + hop.genop("same_as", [v], resultvar=op.result) def gct_zero_gc_pointers_inside(self, hop): if not self.malloc_zero_filled: _______________________________________________ pypy-commit mailing list pypy-commit@python.org http://mail.python.org/mailman/listinfo/pypy-commit