Author: Remi Meier <[email protected]>
Branch: stmgc-c4
Changeset: r67490:2da83847b301
Date: 2013-10-21 14:16 +0200
http://bitbucket.org/pypy/pypy/changeset/2da83847b301/
Log: use new thread-locals for better nursery-fastpath
diff --git a/rpython/jit/backend/llsupport/rewrite.py
b/rpython/jit/backend/llsupport/rewrite.py
--- a/rpython/jit/backend/llsupport/rewrite.py
+++ b/rpython/jit/backend/llsupport/rewrite.py
@@ -161,7 +161,7 @@
def gen_malloc_frame(self, frame_info, frame):
size_box = history.BoxInt()
descrs = self.gc_ll_descr.getframedescrs(self.cpu)
- if self.gc_ll_descr.kind == 'boehm' or self.gc_ll_descr.stm:
+ if self.gc_ll_descr.kind == 'boehm':
op0 = ResOperation(rop.GETFIELD_GC, [history.ConstInt(frame_info)],
size_box,
descr=descrs.jfi_frame_depth)
@@ -171,7 +171,6 @@
self.handle_new_array(descrs.arraydescr, op1)
else:
# we read size in bytes here, not the length
- # (this path is only used in non-STM mode)
op0 = ResOperation(rop.GETFIELD_GC, [history.ConstInt(frame_info)],
size_box,
descr=descrs.jfi_frame_size)
diff --git a/rpython/jit/backend/x86/assembler.py
b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -259,10 +259,9 @@
self._pop_all_regs_from_frame(mc, [eax, edi], self.cpu.supports_floats)
if self.cpu.gc_ll_descr.stm:
# load nursery_current into EDI
- self._load_stm_thread_descriptor(mc, X86_64_SCRATCH_REG)
- mc.MOV_rm(edi.value,
- (X86_64_SCRATCH_REG.value,
- StmGC.TD_NURSERY_CURRENT))
+ nc = self._get_stm_tl(rstm.get_nursery_current_adr())
+ self._tl_segment_if_stm(mc)
+ mc.MOV_rj(edi.value, nc)
else:
nursery_free_adr = self.cpu.gc_ll_descr.get_nursery_free_addr()
mc.MOV(edi, heap(nursery_free_adr)) # load this in EDI
@@ -2755,25 +2754,16 @@
# XXX if the next operation is a GUARD_NO_EXCEPTION, we should
# somehow jump over it too in the fast path
- def _load_stm_thread_descriptor(self, mc, loc):
- assert self.cpu.gc_ll_descr.stm
- assert isinstance(loc, RegLoc)
-
- td = self._get_stm_tl(rstm.get_thread_descriptor_adr())
- self._tl_segment_if_stm(mc)
- mc.MOV(loc, heap(td))
- mc.MOV_rm(loc.value, (loc.value, 0))
-
+
def _cond_allocate_in_nursery_or_slowpath(self, mc, gcmap):
# needed for slowpath:
# eax = nursery_current
# edi = nursery_current + size
- # needed here:
- # X86_64_SCRATCH_REG = thread_descriptor
#
# cmp nursery_current+size > nursery_nextlimit
- mc.CMP_rm(edi.value, (X86_64_SCRATCH_REG.value,
- StmGC.TD_NURSERY_NEXTLIMIT))
+ nnl = self._get_stm_tl(rstm.get_nursery_nextlimit_adr())
+ self._tl_segment_if_stm(mc)
+ mc.CMP_rj(edi.value, nnl)
mc.J_il8(rx86.Conditions['NA'], 0) # patched later
jmp_adr = mc.get_relative_pos()
#
@@ -2781,7 +2771,7 @@
# save the gcmap
self.push_gcmap(mc, gcmap, mov=True)
mc.CALL(imm(self.malloc_slowpath))
- mc.JMP_l8(0)
+ mc.JMP_l8(0) # XXX: is JMP over 1 instr good?
jmp2_adr = mc.get_relative_pos()
#
# == FASTPATH ==
@@ -2789,10 +2779,10 @@
assert 0 < offset <= 127
mc.overwrite(jmp_adr-1, chr(offset))
#
- # thread_descriptor->nursery_current = nursery_current+size
- mc.MOV_mr((X86_64_SCRATCH_REG.value,
- StmGC.TD_NURSERY_CURRENT),
- edi.value)
+ # stm_nursery_current = stm_nursery_current+size
+ nc = self._get_stm_tl(rstm.get_nursery_current_adr())
+ self._tl_segment_if_stm(mc)
+ mc.MOV_jr(nc, edi.value)
#
# END
offset = mc.get_relative_pos() - jmp2_adr
@@ -2804,10 +2794,10 @@
assert size & (WORD-1) == 0 # must be correctly aligned
mc = self.mc
# load nursery_current and nursery_nextlimit
- self._load_stm_thread_descriptor(mc, X86_64_SCRATCH_REG)
- mc.MOV_rm(eax.value,
- (X86_64_SCRATCH_REG.value,
- StmGC.TD_NURSERY_CURRENT))
+ nc = self._get_stm_tl(rstm.get_nursery_current_adr())
+ self._tl_segment_if_stm(mc)
+ mc.MOV_rj(eax.value, nc)
+ #
mc.LEA_rm(edi.value, (eax.value, size))
#
# eax=nursery_current, edi=nursery_current+size
@@ -2816,12 +2806,14 @@
def malloc_cond_varsize_frame_stm(self, sizeloc, gcmap):
assert self.cpu.gc_ll_descr.stm
mc = self.mc
- self._load_stm_thread_descriptor(mc, X86_64_SCRATCH_REG)
if sizeloc is eax:
self.mc.MOV(edi, sizeloc)
sizeloc = edi
- self.mc.MOV_rm(eax.value, (X86_64_SCRATCH_REG.value,
- StmGC.TD_NURSERY_CURRENT))
+
+ nc = self._get_stm_tl(rstm.get_nursery_current_adr())
+ self._tl_segment_if_stm(mc)
+ mc.MOV_rj(eax.value, nc)
+
if sizeloc is edi:
self.mc.ADD_rr(edi.value, eax.value)
else:
@@ -2837,6 +2829,9 @@
assert isinstance(arraydescr, ArrayDescr)
mc = self.mc
+ nc = self._get_stm_tl(rstm.get_nursery_current_adr())
+ nnl = self._get_stm_tl(rstm.get_nursery_nextlimit_adr())
+
# lengthloc is the length of the array, which we must not modify!
assert lengthloc is not eax and lengthloc is not edi
if isinstance(lengthloc, RegLoc):
@@ -2849,10 +2844,8 @@
mc.J_il8(rx86.Conditions['A'], 0) # patched later
jmp_adr0 = mc.get_relative_pos()
- self._load_stm_thread_descriptor(mc, X86_64_SCRATCH_REG)
- mc.MOV_rm(eax.value,
- (X86_64_SCRATCH_REG.value,
- StmGC.TD_NURSERY_CURRENT))
+ self._tl_segment_if_stm(mc)
+ mc.MOV_rj(eax.value, nc)
if valid_addressing_size(itemsize):
shift = get_scale(itemsize)
@@ -2873,8 +2866,8 @@
mc.AND_ri(edi.value, ~(WORD - 1))
# now edi contains the total size in bytes, rounded up to a multiple
# of WORD, plus nursery_free_adr
- mc.CMP_rm(edi.value, (X86_64_SCRATCH_REG.value,
- StmGC.TD_NURSERY_NEXTLIMIT))
+ self._tl_segment_if_stm(mc)
+ mc.CMP_rj(edi.value, nnl)
mc.J_il8(rx86.Conditions['NA'], 0) # patched later
jmp_adr1 = mc.get_relative_pos()
#
@@ -2905,10 +2898,9 @@
assert 0 < offset <= 127
mc.overwrite(jmp_adr1-1, chr(offset))
#
- # set thread_descriptor->nursery_current
- mc.MOV_mr((X86_64_SCRATCH_REG.value,
- StmGC.TD_NURSERY_CURRENT),
- edi.value)
+ # set stm_nursery_current
+ self._tl_segment_if_stm(mc)
+ mc.MOV_jr(nc, edi.value)
#
# write down the tid
mc.MOV(mem(eax, 0), imm(arraydescr.tid))
diff --git a/rpython/memory/gc/stmgc.py b/rpython/memory/gc/stmgc.py
--- a/rpython/memory/gc/stmgc.py
+++ b/rpython/memory/gc/stmgc.py
@@ -37,10 +37,6 @@
malloc_zero_filled = True
#gcflag_extra = GCFLAG_EXTRA
- # SYNC with et.h
- TD_NURSERY_CURRENT = 80
- TD_NURSERY_NEXTLIMIT = 88
-
GCHDR = lltype.Struct(
'GCPTR',
('h_tid', lltype.Unsigned),
diff --git a/rpython/rlib/rstm.py b/rpython/rlib/rstm.py
--- a/rpython/rlib/rstm.py
+++ b/rpython/rlib/rstm.py
@@ -5,8 +5,18 @@
from rpython.rlib.jit import dont_look_inside
@dont_look_inside
-def get_thread_descriptor_adr():
- addr = llop.stm_get_adr_of_thread_descriptor(llmemory.Address)
+def get_nursery_current_adr():
+ addr = llop.stm_get_adr_of_nursery_current(llmemory.Address)
+ return rffi.cast(lltype.Signed, addr)
+
+@dont_look_inside
+def get_nursery_nextlimit_adr():
+ addr = llop.stm_get_adr_of_nursery_nextlimit(llmemory.Address)
+ return rffi.cast(lltype.Signed, addr)
+
+@dont_look_inside
+def get_active_adr():
+ addr = llop.stm_get_adr_of_active(llmemory.Address)
return rffi.cast(lltype.Signed, addr)
@dont_look_inside
diff --git a/rpython/rtyper/llinterp.py b/rpython/rtyper/llinterp.py
--- a/rpython/rtyper/llinterp.py
+++ b/rpython/rtyper/llinterp.py
@@ -951,7 +951,9 @@
op_stm_barrier = _stm_not_implemented
op_stm_push_root = _stm_not_implemented
op_stm_pop_root_into = _stm_not_implemented
- op_stm_get_adr_of_thread_descriptor = _stm_not_implemented
+ op_stm_get_adr_of_nursery_current = _stm_not_implemented
+ op_stm_get_adr_of_nursery_nextlimit = _stm_not_implemented
+ op_stm_get_adr_of_active = _stm_not_implemented
op_stm_get_adr_of_read_barrier_cache = _stm_not_implemented
op_stm_get_adr_of_private_rev_num = _stm_not_implemented
op_stm_enter_callback_call = _stm_not_implemented
diff --git a/rpython/rtyper/lltypesystem/lloperation.py
b/rpython/rtyper/lltypesystem/lloperation.py
--- a/rpython/rtyper/lltypesystem/lloperation.py
+++ b/rpython/rtyper/lltypesystem/lloperation.py
@@ -448,7 +448,9 @@
'stm_get_adr_of_private_rev_num':LLOp(),
'stm_get_adr_of_read_barrier_cache':LLOp(),
- 'stm_get_adr_of_thread_descriptor': LLOp(),
+ 'stm_get_adr_of_nursery_current': LLOp(),
+ 'stm_get_adr_of_nursery_nextlimit': LLOp(),
+ 'stm_get_adr_of_active': LLOp(),
'stm_ignored_start': LLOp(canrun=True),
'stm_ignored_stop': LLOp(canrun=True),
diff --git a/rpython/translator/c/funcgen.py b/rpython/translator/c/funcgen.py
--- a/rpython/translator/c/funcgen.py
+++ b/rpython/translator/c/funcgen.py
@@ -590,7 +590,9 @@
OP_STM_PTR_EQ = _OP_STM
OP_STM_PUSH_ROOT = _OP_STM
OP_STM_POP_ROOT_INTO = _OP_STM
- OP_STM_GET_ADR_OF_THREAD_DESCRIPTOR = _OP_STM
+ OP_STM_GET_ADR_OF_NURSERY_CURRENT = _OP_STM
+ OP_STM_GET_ADR_OF_NURSERY_NEXTLIMIT = _OP_STM
+ OP_STM_GET_ADR_OF_ACTIVE = _OP_STM
OP_STM_GET_ROOT_STACK_TOP = _OP_STM
OP_STM_GET_ADR_OF_PRIVATE_REV_NUM = _OP_STM
OP_STM_GET_ADR_OF_READ_BARRIER_CACHE= _OP_STM
diff --git a/rpython/translator/stm/funcgen.py
b/rpython/translator/stm/funcgen.py
--- a/rpython/translator/stm/funcgen.py
+++ b/rpython/translator/stm/funcgen.py
@@ -114,9 +114,19 @@
return '%s = (%s)stm_pop_root();' % (
arg0, cdecl(funcgen.lltypename(op.args[0]), ''))
-def stm_get_adr_of_thread_descriptor(funcgen, op):
+def stm_get_adr_of_nursery_current(funcgen, op):
result = funcgen.expr(op.result)
- return '%s = (%s)&thread_descriptor;' % (
+ return '%s = (%s)&stm_nursery_current;' % (
+ result, cdecl(funcgen.lltypename(op.result), ''))
+
+def stm_get_adr_of_nursery_nextlimit(funcgen, op):
+ result = funcgen.expr(op.result)
+ return '%s = (%s)&stm_nursery_nextlimit;' % (
+ result, cdecl(funcgen.lltypename(op.result), ''))
+
+def stm_get_adr_of_active(funcgen, op):
+ result = funcgen.expr(op.result)
+ return '%s = (%s)&stm_active;' % (
result, cdecl(funcgen.lltypename(op.result), ''))
def stm_get_root_stack_top(funcgen, op):
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit