Author: Armin Rigo <ar...@tunes.org> Branch: Changeset: r45921:b3db617a2463 Date: 2011-07-23 22:34 +0200 http://bitbucket.org/pypy/pypy/changeset/b3db617a2463/
Log: merge heads diff --git a/pypy/jit/backend/llsupport/gc.py b/pypy/jit/backend/llsupport/gc.py --- a/pypy/jit/backend/llsupport/gc.py +++ b/pypy/jit/backend/llsupport/gc.py @@ -453,21 +453,33 @@ class WriteBarrierDescr(AbstractDescr): def __init__(self, gc_ll_descr): + GCClass = gc_ll_descr.GCClass self.llop1 = gc_ll_descr.llop1 self.WB_FUNCPTR = gc_ll_descr.WB_FUNCPTR self.WB_ARRAY_FUNCPTR = gc_ll_descr.WB_ARRAY_FUNCPTR - self.fielddescr_tid = get_field_descr(gc_ll_descr, - gc_ll_descr.GCClass.HDR, 'tid') - self.jit_wb_if_flag = gc_ll_descr.GCClass.JIT_WB_IF_FLAG - # if convenient for the backend, we also compute the info about + self.fielddescr_tid = get_field_descr(gc_ll_descr, GCClass.HDR, 'tid') + # + self.jit_wb_if_flag = GCClass.JIT_WB_IF_FLAG + self.jit_wb_if_flag_byteofs, self.jit_wb_if_flag_singlebyte = ( + self.extract_flag_byte(self.jit_wb_if_flag)) + # + if hasattr(GCClass, 'JIT_WB_CARDS_SET'): + self.jit_wb_cards_set = GCClass.JIT_WB_CARDS_SET + self.jit_wb_card_page_shift = GCClass.JIT_WB_CARD_PAGE_SHIFT + self.jit_wb_cards_set_byteofs, self.jit_wb_cards_set_singlebyte = ( + self.extract_flag_byte(self.jit_wb_cards_set)) + else: + self.jit_wb_cards_set = 0 + + def extract_flag_byte(self, flag_word): + # if convenient for the backend, we compute the info about # the flag as (byte-offset, single-byte-flag). import struct - value = struct.pack("l", self.jit_wb_if_flag) + value = struct.pack("l", flag_word) assert value.count('\x00') == len(value) - 1 # only one byte is != 0 i = 0 while value[i] == '\x00': i += 1 - self.jit_wb_if_flag_byteofs = i - self.jit_wb_if_flag_singlebyte = struct.unpack('b', value[i])[0] + return (i, struct.unpack('b', value[i])[0]) def get_write_barrier_fn(self, cpu): llop1 = self.llop1 diff --git a/pypy/jit/backend/test/runner_test.py b/pypy/jit/backend/test/runner_test.py --- a/pypy/jit/backend/test/runner_test.py +++ b/pypy/jit/backend/test/runner_test.py @@ -1707,6 +1707,7 @@ jit_wb_if_flag = 4096 jit_wb_if_flag_byteofs = struct.pack("i", 4096).index('\x10') jit_wb_if_flag_singlebyte = 0x10 + jit_wb_cards_set = 0 def get_write_barrier_from_array_fn(self, cpu): return funcbox.getint() # @@ -1728,6 +1729,72 @@ else: assert record == [] + def test_cond_call_gc_wb_array_card_marking_fast_path(self): + def func_void(a, b, c): + record.append((a, b, c)) + record = [] + # + S = lltype.Struct('S', ('tid', lltype.Signed)) + S_WITH_CARDS = lltype.Struct('S_WITH_CARDS', + ('card0', lltype.Char), + ('card1', lltype.Char), + ('card2', lltype.Char), + ('card3', lltype.Char), + ('card4', lltype.Char), + ('card5', lltype.Char), + ('card6', lltype.Char), + ('card7', lltype.Char), + ('data', S)) + FUNC = self.FuncType([lltype.Ptr(S), lltype.Signed, lltype.Ptr(S)], + lltype.Void) + func_ptr = llhelper(lltype.Ptr(FUNC), func_void) + funcbox = self.get_funcbox(self.cpu, func_ptr) + class WriteBarrierDescr(AbstractDescr): + jit_wb_if_flag = 4096 + jit_wb_if_flag_byteofs = struct.pack("i", 4096).index('\x10') + jit_wb_if_flag_singlebyte = 0x10 + jit_wb_cards_set = 8192 + jit_wb_cards_set_byteofs = struct.pack("i", 8192).index('\x20') + jit_wb_cards_set_singlebyte = 0x20 + jit_wb_card_page_shift = 7 + def get_write_barrier_from_array_fn(self, cpu): + return funcbox.getint() + # + for BoxIndexCls in [BoxInt, ConstInt]: + for cond in [False, True]: + print + print '_'*79 + print 'BoxIndexCls =', BoxIndexCls + print 'JIT_WB_CARDS_SET =', cond + print + value = random.randrange(-sys.maxint, sys.maxint) + value |= 4096 + if cond: + value |= 8192 + else: + value &= ~8192 + s = lltype.malloc(S_WITH_CARDS, immortal=True, zero=True) + s.data.tid = value + sgcref = rffi.cast(llmemory.GCREF, s.data) + del record[:] + box_index = BoxIndexCls((9<<7) + 17) + self.execute_operation(rop.COND_CALL_GC_WB_ARRAY, + [BoxPtr(sgcref), box_index, BoxPtr(sgcref)], + 'void', descr=WriteBarrierDescr()) + if cond: + assert record == [] + assert s.card6 == '\x02' + else: + assert record == [(s.data, (9<<7) + 17, s.data)] + assert s.card6 == '\x00' + assert s.card0 == '\x00' + assert s.card1 == '\x00' + assert s.card2 == '\x00' + assert s.card3 == '\x00' + assert s.card4 == '\x00' + assert s.card5 == '\x00' + assert s.card7 == '\x00' + def test_force_operations_returning_void(self): values = [] def maybe_force(token, flag): diff --git a/pypy/jit/backend/x86/assembler.py b/pypy/jit/backend/x86/assembler.py --- a/pypy/jit/backend/x86/assembler.py +++ b/pypy/jit/backend/x86/assembler.py @@ -2246,10 +2246,12 @@ if opnum == rop.COND_CALL_GC_WB: N = 2 func = descr.get_write_barrier_fn(self.cpu) + card_marking = False elif opnum == rop.COND_CALL_GC_WB_ARRAY: N = 3 func = descr.get_write_barrier_from_array_fn(self.cpu) assert func != 0 + card_marking = descr.jit_wb_cards_set != 0 else: raise AssertionError(opnum) # @@ -2258,6 +2260,18 @@ imm(descr.jit_wb_if_flag_singlebyte)) self.mc.J_il8(rx86.Conditions['Z'], 0) # patched later jz_location = self.mc.get_relative_pos() + + # for cond_call_gc_wb_array, also add another fast path: + # if GCFLAG_CARDS_SET, then we can just set one bit and be done + if card_marking: + self.mc.TEST8(addr_add_const(loc_base, + descr.jit_wb_cards_set_byteofs), + imm(descr.jit_wb_cards_set_singlebyte)) + self.mc.J_il8(rx86.Conditions['NZ'], 0) # patched later + jnz_location = self.mc.get_relative_pos() + else: + jnz_location = 0 + # the following is supposed to be the slow path, so whenever possible # we choose the most compact encoding over the most efficient one. if IS_X86_32: @@ -2297,6 +2311,43 @@ loc = arglocs[i] assert isinstance(loc, RegLoc) self.mc.POP_r(loc.value) + + # if GCFLAG_CARDS_SET, then we can do the whole thing that would + # be done in the CALL above with just four instructions, so here + # is an inline copy of them + if card_marking: + self.mc.JMP_l8(0) # jump to the exit, patched later + jmp_location = self.mc.get_relative_pos() + # patch the JNZ above + offset = self.mc.get_relative_pos() - jnz_location + assert 0 < offset <= 127 + self.mc.overwrite(jnz_location-1, chr(offset)) + # + loc_index = arglocs[1] + if isinstance(loc_index, RegLoc): + # choose a scratch register + tmp1 = loc_index + self.mc.PUSH_r(tmp1.value) + # SHR tmp, card_page_shift + self.mc.SHR_ri(tmp1.value, descr.jit_wb_card_page_shift) + # XOR tmp, -8 + self.mc.XOR_ri(tmp1.value, -8) + # BTS [loc_base], tmp + self.mc.BTS(addr_add_const(loc_base, 0), tmp1) + # done + self.mc.POP_r(tmp1.value) + elif isinstance(loc_index, ImmedLoc): + byte_index = loc_index.value >> descr.jit_wb_card_page_shift + byte_ofs = ~(byte_index >> 3) + byte_val = 1 << (byte_index & 7) + self.mc.OR8(addr_add_const(loc_base, byte_ofs), imm(byte_val)) + else: + raise AssertionError("index is neither RegLoc nor ImmedLoc") + # patch the JMP above + offset = self.mc.get_relative_pos() - jmp_location + assert 0 < offset <= 127 + self.mc.overwrite(jmp_location-1, chr(offset)) + # # patch the JZ above offset = self.mc.get_relative_pos() - jz_location assert 0 < offset <= 127 diff --git a/pypy/jit/backend/x86/regloc.py b/pypy/jit/backend/x86/regloc.py --- a/pypy/jit/backend/x86/regloc.py +++ b/pypy/jit/backend/x86/regloc.py @@ -476,6 +476,7 @@ AND = _binaryop('AND') OR = _binaryop('OR') + OR8 = _binaryop('OR8') XOR = _binaryop('XOR') NOT = _unaryop('NOT') SHL = _binaryop('SHL') @@ -483,6 +484,7 @@ SAR = _binaryop('SAR') TEST = _binaryop('TEST') TEST8 = _binaryop('TEST8') + BTS = _binaryop('BTS') ADD = _binaryop('ADD') SUB = _binaryop('SUB') diff --git a/pypy/jit/backend/x86/rx86.py b/pypy/jit/backend/x86/rx86.py --- a/pypy/jit/backend/x86/rx86.py +++ b/pypy/jit/backend/x86/rx86.py @@ -496,6 +496,10 @@ AND8_rr = insn(rex_fw, '\x20', byte_register(1), byte_register(2,8), '\xC0') OR8_rr = insn(rex_fw, '\x08', byte_register(1), byte_register(2,8), '\xC0') + OR8_mi = insn(rex_fw, '\x80', orbyte(1<<3), mem_reg_plus_const(1), + immediate(2, 'b')) + OR8_ji = insn(rex_fw, '\x80', orbyte(1<<3), abs_, immediate(1), + immediate(2, 'b')) NEG_r = insn(rex_w, '\xF7', register(1), '\xD8') @@ -565,6 +569,9 @@ TEST8_ji = insn(rex_nw, '\xF6', orbyte(0<<3), abs_, immediate(1), immediate(2, 'b')) TEST_rr = insn(rex_w, '\x85', register(2,8), register(1), '\xC0') + BTS_mr = insn(rex_w, '\x0F\xAB', register(2,8), mem_reg_plus_const(1)) + BTS_jr = insn(rex_w, '\x0F\xAB', register(2,8), abs_, immediate(1)) + # x87 instructions FSTP_b = insn('\xDD', orbyte(3<<3), stack_bp(1)) diff --git a/pypy/jit/backend/x86/test/test_zrpy_gc.py b/pypy/jit/backend/x86/test/test_zrpy_gc.py --- a/pypy/jit/backend/x86/test/test_zrpy_gc.py +++ b/pypy/jit/backend/x86/test/test_zrpy_gc.py @@ -524,6 +524,76 @@ def test_compile_framework_8(self): self.run('compile_framework_8') + def define_compile_framework_9(cls): + # Like compile_framework_8, but with variable indexes and large + # arrays, testing the card_marking case + def before(n, x): + return n, x, None, None, None, None, None, None, None, None, [X(123)], None + def f(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s): + if n < 1900: + check(l[0].x == 123) + num = 512 + (n & 7) + l = [None] * num + l[0] = X(123) + l[1] = X(n) + l[2] = X(n+10) + l[3] = X(n+20) + l[4] = X(n+30) + l[5] = X(n+40) + l[6] = X(n+50) + l[7] = X(n+60) + l[num-8] = X(n+70) + l[num-9] = X(n+80) + l[num-10] = X(n+90) + l[num-11] = X(n+100) + l[-12] = X(n+110) + l[-13] = X(n+120) + l[-14] = X(n+130) + l[-15] = X(n+140) + if n < 1800: + num = 512 + (n & 7) + check(len(l) == num) + check(l[0].x == 123) + check(l[1].x == n) + check(l[2].x == n+10) + check(l[3].x == n+20) + check(l[4].x == n+30) + check(l[5].x == n+40) + check(l[6].x == n+50) + check(l[7].x == n+60) + check(l[num-8].x == n+70) + check(l[num-9].x == n+80) + check(l[num-10].x == n+90) + check(l[num-11].x == n+100) + check(l[-12].x == n+110) + check(l[-13].x == n+120) + check(l[-14].x == n+130) + check(l[-15].x == n+140) + n -= x.foo + return n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s + def after(n, x, x0, x1, x2, x3, x4, x5, x6, x7, l, s): + check(len(l) >= 512) + check(l[0].x == 123) + check(l[1].x == 2) + check(l[2].x == 12) + check(l[3].x == 22) + check(l[4].x == 32) + check(l[5].x == 42) + check(l[6].x == 52) + check(l[7].x == 62) + check(l[-8].x == 72) + check(l[-9].x == 82) + check(l[-10].x == 92) + check(l[-11].x == 102) + check(l[-12].x == 112) + check(l[-13].x == 122) + check(l[-14].x == 132) + check(l[-15].x == 142) + return before, f, after + + def test_compile_framework_9(self): + self.run('compile_framework_9') + def define_compile_framework_external_exception_handling(cls): def before(n, x): x = X(0) diff --git a/pypy/rpython/memory/gc/minimark.py b/pypy/rpython/memory/gc/minimark.py --- a/pypy/rpython/memory/gc/minimark.py +++ b/pypy/rpython/memory/gc/minimark.py @@ -924,6 +924,20 @@ # "if addr_struct.int0 & JIT_WB_IF_FLAG: remember_young_pointer()") JIT_WB_IF_FLAG = GCFLAG_TRACK_YOUNG_PTRS + # for the JIT to generate custom code corresponding to the array + # write barrier for the simplest case of cards. If JIT_CARDS_SET + # is already set on an object, it will execute code like this: + # MOV eax, index + # SHR eax, JIT_WB_CARD_PAGE_SHIFT + # XOR eax, -8 + # BTS [object], eax + if TRANSLATION_PARAMS['card_page_indices'] > 0: + JIT_WB_CARDS_SET = GCFLAG_CARDS_SET + JIT_WB_CARD_PAGE_SHIFT = 1 + while ((1 << JIT_WB_CARD_PAGE_SHIFT) != + TRANSLATION_PARAMS['card_page_indices']): + JIT_WB_CARD_PAGE_SHIFT += 1 + @classmethod def JIT_max_size_of_young_obj(cls): return cls.TRANSLATION_PARAMS['large_object'] _______________________________________________ pypy-commit mailing list pypy-commit@python.org http://mail.python.org/mailman/listinfo/pypy-commit