Author: Remi Meier <[email protected]>
Branch: stmgc-c4
Changeset: r65682:f819be0d01ca
Date: 2013-07-26 11:20 +0200
http://bitbucket.org/pypy/pypy/changeset/f819be0d01ca/
Log: Add fastpaths for read/write barriers for x64. The asm is still
seriously non-optimal in the fastpath
diff --git a/rpython/jit/backend/x86/assembler.py
b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -2182,12 +2182,19 @@
assert isinstance(result_loc, RegLoc)
mc.POP_r(result_loc.value)
- def _get_private_rev_num_addr(self):
+ def _get_stm_private_rev_num_addr(self):
assert self.cpu.gc_ll_descr.stm
rn = rstm.get_adr_of_private_rev_num()
rn = rn - stmtlocal.threadlocal_base()
assert rx86.fits_in_32bits(rn)
return rn
+
+ def _get_stm_read_barrier_cache_addr(self):
+ assert self.cpu.gc_ll_descr.stm
+ rbc = rstm.get_adr_of_read_barrier_cache()
+ rbc = rbc - stmtlocal.threadlocal_base()
+ assert rx86.fits_in_32bits(rbc)
+ return rbc
def _stm_barrier_fastpath(self, mc, descr, arglocs, is_frame=False,
align_stack=False):
@@ -2207,18 +2214,82 @@
#
# FASTPATH:
#
- rn = self._get_private_rev_num_addr()
+ # write_barrier:
+ # (obj->h_revision != stm_private_rev_num)
+ # || (obj->h_tid & GCFLAG_WRITE_BARRIER) != 0)
+ # read_barrier:
+ # (obj->h_revision != stm_private_rev_num)
+ # && (FXCACHE_AT(obj) != obj)))
+ assert not IS_X86_32 # XXX: todo
+ jz_location = 0
+ jz_location2 = 0
+ jnz_location = 0
+ # compare h_revision with stm_private_rev_num (XXX: may be slow)
+ rn = self._get_stm_private_rev_num_addr()
+ stmtlocal.tl_segment_prefix(mc)
+ mc.MOV_rj(X86_64_SCRATCH_REG.value, rn)
+ if loc_base == ebp:
+ mc.CMP_rb(X86_64_SCRATCH_REG.value, StmGC.H_REVISION)
+ else:
+ mc.CMP(X86_64_SCRATCH_REG, mem(loc_base, StmGC.H_REVISION))
+
if isinstance(descr, STMReadBarrierDescr):
- # (obj->h_revision != stm_private_rev_num)
- # && (FXCACHE_AT(obj) != obj)))
- stmtlocal.tl_segment_prefix(mc)
- #mc.CMP_jr(rn, loc_base.value)
- mc.MOV_rj(X86_64_SCRATCH_REG.value, rn)
- mc.CMP(X86_64_SCRATCH_REG, mem(loc_base, StmGC.H_REVISION))
+ # jump to end if h_rev==priv_rev
mc.J_il8(rx86.Conditions['Z'], 0) # patched below
jz_location = mc.get_relative_pos()
- else:
- jz_location = 0
+ else: # write_barrier
+ # jump to slowpath if h_rev!=priv_rev
+ mc.J_il8(rx86.Conditions['NZ'], 0) # patched below
+ jnz_location = mc.get_relative_pos()
+
+ if isinstance(descr, STMReadBarrierDescr):
+ # FXCACHE_AT(obj) != obj
+ # XXX: optimize...
+ temp = loc_base.find_unused_reg()
+ mc.PUSH_r(temp.value)
+ mc.MOV_rr(temp.value, loc_base.value)
+ mc.AND_ri(temp.value, StmGC.FX_MASK)
+
+ # XXX: addressings like [rdx+rax*1] don't seem to work
+ rbc = self._get_stm_read_barrier_cache_addr()
+ stmtlocal.tl_segment_prefix(mc)
+ mc.MOV_rj(X86_64_SCRATCH_REG.value, rbc)
+ mc.ADD_rr(X86_64_SCRATCH_REG.value, temp.value)
+ mc.CMP(loc_base, mem(X86_64_SCRATCH_REG, 0))
+ mc.POP_r(temp.value)
+ mc.J_il8(rx86.Conditions['Z'], 0) # patched below
+ jz_location2 = mc.get_relative_pos()
+ # <stm_read_barrier+21>: mov rdx,0xffffffffffffffb0
+ # <stm_read_barrier+28>: movzx eax,di
+ # <stm_read_barrier+31>: mov rdx,QWORD PTR fs:[rdx]
+ # <stm_read_barrier+35>: cmp rdi,QWORD PTR [rdx+rax*1]
+ # <stm_read_barrier+39>: je 0x401f61 <stm_read_barrier+17>
+ # <stm_read_barrier+41>: jmp 0x6a59f0 <stm_DirectReadBarrier>
+
+ if isinstance(descr, STMWriteBarrierDescr):
+ # obj->h_tid & GCFLAG_WRITE_BARRIER) != 0
+ if loc_base == ebp:
+ #mc.MOV_rb(X86_64_SCRATCH_REG.value, StmGC.H_TID)
+ mc.TEST8_bi(StmGC.H_TID, StmGC.GCFLAG_WRITE_BARRIER)
+ else:
+ # mc.MOV(X86_64_SCRATCH_REG, mem(loc_base, StmGC.H_TID))
+ mc.TEST8_mi((loc_base.value, StmGC.H_TID),
+ StmGC.GCFLAG_WRITE_BARRIER)
+ #doesn't work:
+ # mc.TEST(X86_64_SCRATCH_REG, imm(StmGC.GCFLAG_WRITE_BARRIER))
+ mc.J_il8(rx86.Conditions['NZ'], 0) # patched below
+ jnz_location2 = mc.get_relative_pos()
+
+ # jump to end
+ mc.JMP_l8(0) # patched below
+ jz_location = mc.get_relative_pos()
+
+ # jump target slowpath:
+ offset = mc.get_relative_pos() - jnz_location
+ offset2 = mc.get_relative_pos() - jnz_location2
+ assert 0 < offset <= 127
+ mc.overwrite(jnz_location - 1, chr(offset))
+ mc.overwrite(jnz_location2 - 1, chr(offset2))
#
# SLOWPATH_START
#
@@ -2243,10 +2314,14 @@
#
# SLOWPATH_END
#
+ # jump target end:
+ offset = mc.get_relative_pos() - jz_location
+ assert 0 < offset <= 127
+ mc.overwrite(jz_location - 1, chr(offset))
if isinstance(descr, STMReadBarrierDescr):
- offset = mc.get_relative_pos() - jz_location
+ offset = mc.get_relative_pos() - jz_location2
assert 0 < offset <= 127
- mc.overwrite(jz_location - 1, chr(offset))
+ mc.overwrite(jz_location2 - 1, chr(offset))
diff --git a/rpython/memory/gc/stmgc.py b/rpython/memory/gc/stmgc.py
--- a/rpython/memory/gc/stmgc.py
+++ b/rpython/memory/gc/stmgc.py
@@ -12,23 +12,7 @@
WORD = LONG_BIT // 8
NULL = llmemory.NULL
-
-# keep in sync with stmgc.h & et.h:
first_gcflag = 1 << (LONG_BIT//2)
-GCFLAG_OLD = first_gcflag << 0
-GCFLAG_VISITED = first_gcflag << 1
-GCFLAG_PUBLIC = first_gcflag << 2
-GCFLAG_PREBUILT_ORIGINAL = first_gcflag << 3
-GCFLAG_PUBLIC_TO_PRIVATE = first_gcflag << 4
-GCFLAG_WRITE_BARRIER = first_gcflag << 5 # stmgc.h
-GCFLAG_NURSERY_MOVED = first_gcflag << 6
-GCFLAG_BACKUP_COPY = first_gcflag << 7 # debug
-GCFLAG_STUB = first_gcflag << 8 # debug
-GCFLAG_PRIVATE_FROM_PROTECTED = first_gcflag << 9
-GCFLAG_HAS_ID = first_gcflag << 10
-
-PREBUILT_FLAGS = first_gcflag * (1 + 2 + 4 + 8)
-PREBUILT_REVISION = r_uint(1)
class StmGC(MovingGCBase):
@@ -53,6 +37,27 @@
TRANSLATION_PARAMS = {
}
+ # keep in sync with stmgc.h & et.h:
+ GCFLAG_OLD = first_gcflag << 0
+ GCFLAG_VISITED = first_gcflag << 1
+ GCFLAG_PUBLIC = first_gcflag << 2
+ GCFLAG_PREBUILT_ORIGINAL = first_gcflag << 3
+ GCFLAG_PUBLIC_TO_PRIVATE = first_gcflag << 4
+ GCFLAG_WRITE_BARRIER = first_gcflag << 5 # stmgc.h
+ GCFLAG_NURSERY_MOVED = first_gcflag << 6
+ GCFLAG_BACKUP_COPY = first_gcflag << 7 # debug
+ GCFLAG_STUB = first_gcflag << 8 # debug
+ GCFLAG_PRIVATE_FROM_PROTECTED = first_gcflag << 9
+ GCFLAG_HAS_ID = first_gcflag << 10
+ GCFLAG_IMMUTABLE = first_gcflag << 11;
+ GCFLAG_SMALLSTUB = first_gcflag << 12;
+
+ PREBUILT_FLAGS = first_gcflag * (1 + 2 + 4 + 8)
+ PREBUILT_REVISION = r_uint(1)
+
+ FX_MASK = 65535
+
+
def setup(self):
# Hack: MovingGCBase.setup() sets up stuff related to id(), which
# we implement differently anyway. So directly call GCBase.setup().
@@ -75,7 +80,7 @@
def get_original_copy(self, obj):
addr = llmemory.cast_ptr_to_adr(obj)
- if bool(self.get_hdr_tid(addr)[0] & GCFLAG_PREBUILT_ORIGINAL):
+ if bool(self.get_hdr_tid(addr)[0] & self.GCFLAG_PREBUILT_ORIGINAL):
return obj
#
orig = self.get_hdr_original(addr)[0]
@@ -125,7 +130,7 @@
"""Means the reference will stay valid, except if not
seen by the GC, then it can get collected."""
tid = self.get_hdr_tid(obj)[0]
- if bool(tid & GCFLAG_OLD):
+ if bool(tid & self.GCFLAG_OLD):
return False
return True
diff --git a/rpython/rlib/rstm.py b/rpython/rlib/rstm.py
--- a/rpython/rlib/rstm.py
+++ b/rpython/rlib/rstm.py
@@ -7,6 +7,10 @@
addr = llop.stm_get_adr_of_private_rev_num(llmemory.Address)
return rffi.cast(lltype.Signed, addr)
+def get_adr_of_read_barrier_cache():
+ addr = llop.stm_get_adr_of_read_barrier_cache(llmemory.Address)
+ return rffi.cast(lltype.Signed, addr)
+
def become_inevitable():
llop.stm_become_inevitable(lltype.Void)
diff --git a/rpython/rtyper/lltypesystem/lloperation.py
b/rpython/rtyper/lltypesystem/lloperation.py
--- a/rpython/rtyper/lltypesystem/lloperation.py
+++ b/rpython/rtyper/lltypesystem/lloperation.py
@@ -457,6 +457,7 @@
'stm_inspect_abort_info': LLOp(sideeffects=False),
'stm_get_adr_of_private_rev_num':LLOp(),
+ 'stm_get_adr_of_read_barrier_cache':LLOp(),
# __________ address operations __________
diff --git a/rpython/translator/c/funcgen.py b/rpython/translator/c/funcgen.py
--- a/rpython/translator/c/funcgen.py
+++ b/rpython/translator/c/funcgen.py
@@ -593,6 +593,7 @@
OP_STM_POP_ROOT_INTO = _OP_STM
OP_STM_GET_ROOT_STACK_TOP = _OP_STM
OP_STM_GET_ADR_OF_PRIVATE_REV_NUM = _OP_STM
+ OP_STM_GET_ADR_OF_READ_BARRIER_CACHE= _OP_STM
OP_STM_ALLOCATE = _OP_STM
OP_STM_WEAKREF_ALLOCATE = _OP_STM
OP_STM_GET_TID = _OP_STM
diff --git a/rpython/translator/stm/funcgen.py
b/rpython/translator/stm/funcgen.py
--- a/rpython/translator/stm/funcgen.py
+++ b/rpython/translator/stm/funcgen.py
@@ -103,6 +103,11 @@
result = funcgen.expr(op.result)
return '%s = (%s)&stm_private_rev_num;' % (
result, cdecl(funcgen.lltypename(op.result), ''))
+
+def stm_get_adr_of_read_barrier_cache(funcgen, op):
+ result = funcgen.expr(op.result)
+ return '%s = (%s)&stm_read_barrier_cache;' % (
+ result, cdecl(funcgen.lltypename(op.result), ''))
def stm_weakref_allocate(funcgen, op):
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit