Author: Armin Rigo <[email protected]>
Branch: stmgc-c7
Changeset: r72294:a006a6263e15
Date: 2014-06-30 18:41 +0200
http://bitbucket.org/pypy/pypy/changeset/a006a6263e15/
Log: in-progress
diff --git a/rpython/jit/backend/llsupport/gc.py
b/rpython/jit/backend/llsupport/gc.py
--- a/rpython/jit/backend/llsupport/gc.py
+++ b/rpython/jit/backend/llsupport/gc.py
@@ -302,9 +302,11 @@
self.extract_flag_byte(self.jit_wb_cards_set))
#
# the x86 backend uses the following "accidental" facts to
- # avoid one instruction:
- assert self.jit_wb_cards_set_byteofs == self.jit_wb_if_flag_byteofs
- assert self.jit_wb_cards_set_singlebyte == -0x80
+ # avoid one instruction (not with stm):
+ if not gc_ll_descr.stm:
+ assert (self.jit_wb_cards_set_byteofs ==
+ self.jit_wb_if_flag_byteofs)
+ assert self.jit_wb_cards_set_singlebyte == -0x80
else:
self.jit_wb_cards_set = 0
diff --git a/rpython/jit/backend/llsupport/rewrite.py
b/rpython/jit/backend/llsupport/rewrite.py
--- a/rpython/jit/backend/llsupport/rewrite.py
+++ b/rpython/jit/backend/llsupport/rewrite.py
@@ -416,18 +416,14 @@
self.gen_write_barrier(val, op.stm_location)
self.newops.append(op)
- def handle_write_barrier_setinteriorfield(self, op):
- val = op.getarg(0)
- if self.must_apply_write_barrier(val, op.getarg(2)):
- self.gen_write_barrier(val, op.stm_location)
- self.newops.append(op)
-
def handle_write_barrier_setarrayitem(self, op):
val = op.getarg(0)
if self.must_apply_write_barrier(val, op.getarg(2)):
self.gen_write_barrier_array(val, op.getarg(1), op.stm_location)
self.newops.append(op)
+ handle_write_barrier_setinteriorfield = handle_write_barrier_setarrayitem
+
def gen_write_barrier(self, v_base, stm_location):
write_barrier_descr = self.gc_ll_descr.write_barrier_descr
args = [v_base]
diff --git a/rpython/jit/backend/llsupport/test/test_rewrite.py
b/rpython/jit/backend/llsupport/test/test_rewrite.py
--- a/rpython/jit/backend/llsupport/test/test_rewrite.py
+++ b/rpython/jit/backend/llsupport/test/test_rewrite.py
@@ -678,7 +678,7 @@
jump(p1, p2)
""", """
[p1, p2]
- cond_call_gc_wb(p1, descr=wbdescr)
+ cond_call_gc_wb_array(p1, 0, descr=wbdescr)
setinteriorfield_gc(p1, 0, p2, descr=interiorzdescr)
jump(p1, p2)
""", interiorzdescr=interiorzdescr)
diff --git a/rpython/jit/backend/llsupport/test/test_stmrewrite.py
b/rpython/jit/backend/llsupport/test/test_stmrewrite.py
--- a/rpython/jit/backend/llsupport/test/test_stmrewrite.py
+++ b/rpython/jit/backend/llsupport/test/test_stmrewrite.py
@@ -51,7 +51,7 @@
self.gc_ll_descr = GcLLDescr_framework(gcdescr, None, None, None,
really_not_translated=True)
self.gc_ll_descr.write_barrier_descr.has_write_barrier_from_array = (
- lambda cpu: False) # for now
+ lambda cpu: True)
self.gc_ll_descr.minimal_size_in_nursery = 16
#
class FakeCPU(BaseFakeCPU):
@@ -515,9 +515,9 @@
jump()
""", """
[p1, i1, p2, p3, i3, p4]
- cond_call_gc_wb(p1, descr=wbdescr)
+ cond_call_gc_wb_array(p1, i1, descr=wbdescr)
setarrayitem_gc(p1, i1, p2, descr=adescr)
- cond_call_gc_wb(p3, descr=wbdescr)
+ cond_call_gc_wb_array(p3, i3, descr=wbdescr)
setarrayitem_gc(p3, i3, p4, descr=adescr)
jump()
@@ -532,9 +532,10 @@
jump()
""", """
[p1, p2, i2, p3, i3]
- cond_call_gc_wb(p1, descr=wbdescr)
+ cond_call_gc_wb_array(p1, i2, descr=wbdescr)
setarrayitem_gc(p1, i2, p2, descr=adescr)
i4 = read_timestamp()
+ cond_call_gc_wb_array(p1, i3, descr=wbdescr)
setarrayitem_gc(p1, i3, p3, descr=adescr)
jump()
@@ -549,9 +550,10 @@
jump()
""", """
[p1, p2, i2, p3, i3]
- cond_call_gc_wb(p1, descr=wbdescr)
+ cond_call_gc_wb_array(p1, i2, descr=wbdescr)
setinteriorfield_gc(p1, i2, p2, descr=intzdescr)
i4 = read_timestamp()
+ cond_call_gc_wb_array(p1, i3, descr=wbdescr)
setinteriorfield_gc(p1, i3, p3, descr=intzdescr)
jump()
@@ -1115,7 +1117,7 @@
setfield_gc(p1, 8111, descr=tiddescr)
setfield_gc(p1, 5, descr=clendescr)
label(p1, i2, p3)
- cond_call_gc_wb(p1, descr=wbdescr)
+ cond_call_gc_wb_array(p1, i2, descr=wbdescr)
setarrayitem_gc(p1, i2, p3, descr=cdescr)
""")
diff --git a/rpython/jit/backend/x86/assembler.py
b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -451,13 +451,17 @@
# A final TEST8 before the RET, for the caller. Careful to
# not follow this instruction with another one that changes
# the status of the CPU flags!
- if IS_X86_32:
- mc.MOV_rs(eax.value, 3*WORD)
+ if stm:
+ mc.TEST8_rr(eax.value | BYTE_REG_FLAG,
+ eax.value | BYTE_REG_FLAG)
else:
- mc.MOV_rs(eax.value, WORD)
- mc.TEST8(addr_add_const(self.SEGMENT_GC, eax,
- descr.jit_wb_if_flag_byteofs),
- imm(-0x80))
+ if IS_X86_32:
+ mc.MOV_rs(eax.value, 3*WORD)
+ else:
+ mc.MOV_rs(eax.value, WORD)
+ mc.TEST8(addr_add_const(self.SEGMENT_GC, eax,
+ descr.jit_wb_if_flag_byteofs),
+ imm(-0x80))
#
if not for_frame:
@@ -2218,15 +2222,17 @@
cls = self.cpu.gc_ll_descr.has_write_barrier_class()
assert cls is not None and isinstance(descr, cls)
#
+ stm = self.cpu.gc_ll_descr.stm
card_marking = False
mask = descr.jit_wb_if_flag_singlebyte
if array and descr.jit_wb_cards_set != 0:
- # assumptions the rest of the function depends on:
- assert (descr.jit_wb_cards_set_byteofs ==
- descr.jit_wb_if_flag_byteofs)
- assert descr.jit_wb_cards_set_singlebyte == -0x80
+ if not stm:
+ # assumptions the rest of the function depends on:
+ assert (descr.jit_wb_cards_set_byteofs ==
+ descr.jit_wb_if_flag_byteofs)
+ assert descr.jit_wb_cards_set_singlebyte == -0x80
+ mask = descr.jit_wb_if_flag_singlebyte | -0x80
card_marking = True
- mask = descr.jit_wb_if_flag_singlebyte | -0x80
#
loc_base = arglocs[0]
if is_frame:
@@ -2242,10 +2248,18 @@
# for cond_call_gc_wb_array, also add another fast path:
# if GCFLAG_CARDS_SET, then we can just set one bit and be done
if card_marking:
- # GCFLAG_CARDS_SET is in this byte at 0x80, so this fact can
- # been checked by the status flags of the previous TEST8
- mc.J_il8(rx86.Conditions['S'], 0) # patched later
- js_location = mc.get_relative_pos()
+ if stm:
+ loc2 = addr_add_const(self.SEGMENT_GC, loc_base,
+ descr.jit_wb_cards_set_byteofs)
+ mask2 = descr.jit_wb_cards_set_singlebyte
+ mc.TEST8(loc2, imm(mask2))
+ mc.J_il8(rx86.Conditions['NZ'], 0) # patched later
+ js_location = mc.get_relative_pos()
+ else:
+ # GCFLAG_CARDS_SET is in this byte at 0x80, so this fact can
+ # been checked by the status flags of the previous TEST8
+ mc.J_il8(rx86.Conditions['S'], 0) # patched later
+ js_location = mc.get_relative_pos()
else:
js_location = 0
@@ -2266,7 +2280,7 @@
#
if not is_frame:
mc.PUSH(loc_base)
- if self.cpu.gc_ll_descr.stm:
+ if stm:
# get the num and ref components of the stm_location, and
# push them to the stack. It's 16 bytes, so alignment is
# still ok. The one or three words pushed here are removed
@@ -2286,7 +2300,10 @@
# The helper ends again with a check of the flag in the object.
# So here, we can simply write again a 'JNS', which will be
# taken if GCFLAG_CARDS_SET is still not set.
- mc.J_il8(rx86.Conditions['NS'], 0) # patched later
+ if stm:
+ mc.J_il8(rx86.Conditions['Z'], 0) # patched later
+ else:
+ mc.J_il8(rx86.Conditions['NS'], 0) # patched later
jns_location = mc.get_relative_pos()
#
# patch the JS above
@@ -2297,7 +2314,56 @@
# case GCFLAG_CARDS_SET: emit a few instructions to do
# directly the card flag setting
loc_index = arglocs[1]
- if isinstance(loc_index, RegLoc):
+
+ if stm:
+ # must write the value CARD_MARKED into the byte at:
+ # write_locks_base + (object >> 4) + (index / CARD_SIZE)
+ #
+ write_locks_base = rstm.adr__stm_write_slowpath_card_extra_base
+ if rstm.CARD_SIZE == 32:
+ card_bits = 5
+ elif rstm.CARD_SIZE == 64:
+ card_bits = 6
+ elif rstm.CARD_SIZE == 128:
+ card_bits = 7
+ else:
+ raise AssertionError("CARD_SIZE should be 32/64/128")
+ #
+ # idea: mov r11, loc_base # the object
+ # and r11, ~15 # align
+ # lea r11, [loc_index + r11<<(card_bits-4)]
+ # shr r11, card_bits
+ # mov [r11 + write_locks_base], card_marked
+ r11 = X86_64_SCRATCH_REG
+ if isinstance(loc_index, RegLoc):
+ if isinstance(loc_base, RegLoc):
+ mc.MOV_rr(r11.value, loc_base.value)
+ mc.AND_ri(r11.value, ~15)
+ else:
+ assert isinstance(loc_base, ImmedLoc)
+ mc.MOV_ri(r11.value, loc_base.value & ~15) # 32/64bit
+ mc.LEA_ra(r11.value, (self.SEGMENT_NO,
+ loc_index.value,
+ r11.value,
+ card_bits - 4,
+ 0))
+ mc.SHR_ri(r11.value, card_bits)
+ else:
+ # XXX these cases could be slightly more optimized
+ assert isinstance(loc_index, ImmedLoc)
+ cardindex = loc_index.value >> card_bits
+ if isinstance(loc_base, RegLoc):
+ mc.MOV_ri(r11.value, cardindex << 4) # 32/64bit
+ mc.ADD_rr(r11.value, loc_base.value)
+ mc.SHR_ri(r11.value, 4)
+ else:
+ mc.MOV_ri(r11.value, cardindex + (loc_base.value >> 4))
+ #
+ assert rx86.fits_in_32bits(write_locks_base), "XXX"
+ mc.MOV8_mi((self.SEGMENT_NO, r11.value, write_locks_base),
+ rstm.CARD_MARKED)
+
+ elif isinstance(loc_index, RegLoc):
if IS_X86_64 and isinstance(loc_base, RegLoc):
# copy loc_index into r11
tmp1 = X86_64_SCRATCH_REG
diff --git a/rpython/memory/gc/stmgc.py b/rpython/memory/gc/stmgc.py
--- a/rpython/memory/gc/stmgc.py
+++ b/rpython/memory/gc/stmgc.py
@@ -34,6 +34,7 @@
VISIT_FPTR = lltype.Ptr(lltype.FuncType([llmemory.Address], lltype.Void))
JIT_WB_IF_FLAG = 0x01 # value of _STM_GCFLAG_WRITE_BARRIER
+ JIT_WB_CARDS_SET = 0x08 # value of _STM_GCFLAG_CARDS_SET
stm_fast_alloc = 66*1024 # value of _STM_FAST_ALLOC in stmgc.h
minimal_size_in_nursery = 16 # hard-coded lower limit
diff --git a/rpython/memory/gctransform/stmframework.py
b/rpython/memory/gctransform/stmframework.py
--- a/rpython/memory/gctransform/stmframework.py
+++ b/rpython/memory/gctransform/stmframework.py
@@ -141,6 +141,12 @@
lltype.Signed, rstm.adr_write_slowpath)
hop.genop("cast_int_to_ptr", [c_write_slowpath], resultvar=op.result)
+ def gct_get_write_barrier_from_array_failing_case(self, hop):
+ op = hop.spaceop
+ c_write_slowpath = rmodel.inputconst(
+ lltype.Signed, rstm.adr_write_slowpath_card_extra)
+ hop.genop("cast_int_to_ptr", [c_write_slowpath], resultvar=op.result)
+
def gct_gc_can_move(self, hop):
hop.rename('stm_can_move')
diff --git a/rpython/rlib/rstm.py b/rpython/rlib/rstm.py
--- a/rpython/rlib/rstm.py
+++ b/rpython/rlib/rstm.py
@@ -25,6 +25,12 @@
adr_segment_base = (
CFlexSymbolic('((long)&STM_SEGMENT->segment_base)'))
adr_write_slowpath = CFlexSymbolic('((long)&_stm_write_slowpath)')
+adr_write_slowpath_card_extra = (
+ CFlexSymbolic('((long)&_stm_write_slowpath_card_extra)'))
+adr__stm_write_slowpath_card_extra_base = (
+ CFlexSymbolic('((long)&_stm_write_slowpath_card_extra_base)'))
+CARD_MARKED = CFlexSymbolic('_STM_CARD_MARKED')
+CARD_SIZE = CFlexSymbolic('_STM_CARD_SIZE')
adr__pypy_stm_become_inevitable = (
CFlexSymbolic('((long)&_pypy_stm_become_inevitable)'))
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit