Author: Armin Rigo <[email protected]>
Branch: stmgc-c7
Changeset: r70168:c851d6840147
Date: 2014-03-22 17:15 +0100
http://bitbucket.org/pypy/pypy/changeset/c851d6840147/
Log: in-progress
diff --git a/rpython/jit/backend/llsupport/gc.py
b/rpython/jit/backend/llsupport/gc.py
--- a/rpython/jit/backend/llsupport/gc.py
+++ b/rpython/jit/backend/llsupport/gc.py
@@ -564,12 +564,20 @@
self.for_test_only.x = x0 + x1 + x2 + x3
def get_nursery_free_addr(self):
- nurs_addr = llop.gc_adr_of_nursery_free(llmemory.Address)
- return rffi.cast(lltype.Signed, nurs_addr)
+ if self.stm:
+ from rpython.rlib import rstm
+ return rstm.adr_nursery_free
+ else:
+ nurs_addr = llop.gc_adr_of_nursery_free(llmemory.Address)
+ return rffi.cast(lltype.Signed, nurs_addr)
def get_nursery_top_addr(self):
- nurs_top_addr = llop.gc_adr_of_nursery_top(llmemory.Address)
- return rffi.cast(lltype.Signed, nurs_top_addr)
+ if self.stm:
+ from rpython.rlib import rstm
+ return rstm.adr_nursery_top
+ else:
+ nurs_top_addr = llop.gc_adr_of_nursery_top(llmemory.Address)
+ return rffi.cast(lltype.Signed, nurs_top_addr)
def initialize(self):
pass
diff --git a/rpython/jit/backend/x86/assembler.py
b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -73,7 +73,7 @@
if WORD == 8:
self.pending_memoryerror_trampoline_from = []
self.error_trampoline_64 = 0
- self.mc = codebuf.MachineCodeBlockWrapper()
+ self.mc = codebuf.MachineCodeBlockWrapper(self.cpu)
#assert self.datablockwrapper is None --- but obscure case
# possible, e.g. getting MemoryError and continuing
allblocks = self.get_asmmemmgr_blocks(looptoken)
@@ -112,7 +112,7 @@
mc.MOV_bi(extra_ofs, value)
def build_frame_realloc_slowpath(self):
- mc = codebuf.MachineCodeBlockWrapper()
+ mc = codebuf.MachineCodeBlockWrapper(self.cpu)
self._push_all_regs_to_frame(mc, [], self.cpu.supports_floats)
# this is the gcmap stored by push_gcmap(mov=True) in
_check_stack_frame
mc.MOV_rs(ecx.value, WORD)
@@ -156,7 +156,7 @@
""" This builds a general call slowpath, for whatever call happens to
come.
"""
- mc = codebuf.MachineCodeBlockWrapper()
+ mc = codebuf.MachineCodeBlockWrapper(self.cpu)
# copy registers to the frame, with the exception of the
# 'cond_call_register_arguments' and eax, because these have already
# been saved by the caller. Note that this is not symmetrical:
@@ -196,7 +196,7 @@
This function does not have to preserve registers. It expects
all registers to be saved in the caller.
"""
- mc = codebuf.MachineCodeBlockWrapper()
+ mc = codebuf.MachineCodeBlockWrapper(self.cpu)
# store the gc pattern
ofs = self.cpu.get_ofs_of_frame_field('jf_gcmap')
mc.MOV_rs(ecx.value, WORD)
@@ -250,7 +250,7 @@
This function must preserve all registers apart from eax and edi.
"""
assert kind in ['fixed', 'str', 'unicode', 'var']
- mc = codebuf.MachineCodeBlockWrapper()
+ mc = codebuf.MachineCodeBlockWrapper(self.cpu)
self._push_all_regs_to_frame(mc, [eax, edi], self.cpu.supports_floats)
# store the gc pattern
ofs = self.cpu.get_ofs_of_frame_field('jf_gcmap')
@@ -305,14 +305,9 @@
self._reload_frame_if_necessary(mc, align_stack=True)
self.set_extra_stack_depth(mc, 0)
self._pop_all_regs_from_frame(mc, [eax, edi], self.cpu.supports_floats)
- if self.cpu.gc_ll_descr.stm:
- # load nursery_current into EDI
- nc = self._get_stm_tl(rstm.get_nursery_current_adr())
- self._tl_segment_if_stm(mc)
- mc.MOV_rj(edi.value, nc)
- else:
- nursery_free_adr = self.cpu.gc_ll_descr.get_nursery_free_addr()
- mc.MOV(edi, heap(nursery_free_adr)) # load this in EDI
+ nursery_free_adr = self.cpu.gc_ll_descr.get_nursery_free_addr()
+ mc.SEGC7()
+ mc.MOV(edi, heap(nursery_free_adr)) # load this in EDI
# clear the gc pattern
mc.MOV_bi(ofs, 0)
mc.RET()
@@ -333,7 +328,7 @@
if not self.cpu.propagate_exception_descr:
return # not supported (for tests, or non-translated)
#
- self.mc = codebuf.MachineCodeBlockWrapper()
+ self.mc = codebuf.MachineCodeBlockWrapper(self.cpu)
#
# read and reset the current exception
@@ -350,22 +345,6 @@
self.propagate_exception_path = rawstart
self.mc = None
- def _get_stm_tl(self, adr):
- """Makes 'adr' relative to threadlocal-base if we run in STM.
- Before using such a relative address, call _tl_segment_if_stm()."""
- if self.cpu.gc_ll_descr.stm and we_are_translated():
- # only for STM and not during tests
- result = adr - stmtlocal.threadlocal_base()
- assert rx86.fits_in_32bits(result)
- return result
- return adr
-
- def _tl_segment_if_stm(self, mc):
- """Insert segment prefix for thread-local memory if we run
- in STM and not during testing."""
- if self.cpu.gc_ll_descr.stm and we_are_translated():
- stmtlocal.tl_segment_prefix(mc)
-
def _build_stack_check_slowpath(self):
if self.cpu.gc_ll_descr.stm:
return # XXX no stack check on STM for now
@@ -381,7 +360,7 @@
# | my own retaddr | <-- esp
# +---------------------+
#
- mc = codebuf.MachineCodeBlockWrapper()
+ mc = codebuf.MachineCodeBlockWrapper(self.cpu)
#
if IS_X86_64:
# on the x86_64, we have to save all the registers that may
@@ -402,8 +381,8 @@
else:
mc.ADD_ri(esp.value, WORD)
#
- ea = self._get_stm_tl(self.cpu.pos_exception())
- self._tl_segment_if_stm(mc)
+ ea = mc.in_tl_segment(self.cpu.pos_exception())
+ mc.SEGTL()
mc.MOV(eax, heap(ea))
mc.TEST_rr(eax.value, eax.value)
mc.J_il8(rx86.Conditions['NZ'], 0)
@@ -441,7 +420,7 @@
# all XMM registers. It takes a single argument just pushed
# on the stack even on X86_64. It must restore stack alignment
# accordingly.
- mc = codebuf.MachineCodeBlockWrapper()
+ mc = codebuf.MachineCodeBlockWrapper(self.cpu)
#
if not for_frame:
self._push_all_regs_to_frame(mc, [], withfloats, callee_only=True)
@@ -518,7 +497,7 @@
#
# make the stm_longjmp_callback() function, with signature
# void (*longjmp_callback)(void *stm_resume_buffer)
- mc = codebuf.MachineCodeBlockWrapper()
+ mc = codebuf.MachineCodeBlockWrapper(self.cpu)
#
# 'edi' contains the stm resume buffer, so the new stack
# location that we have to enforce is 'edi - FRAME_FIXED_SIZE * WORD'.
@@ -529,9 +508,9 @@
#
# restore the shadowstack pointer from stm_resume_buffer[1]
gcrootmap = self.cpu.gc_ll_descr.gcrootmap
- rst = self._get_stm_tl(gcrootmap.get_root_stack_top_addr())
+ rst = mc.in_tl_segment(gcrootmap.get_root_stack_top_addr())
mc.MOV_rs(eax.value, (FRAME_FIXED_SIZE + 1) * WORD)
- self._tl_segment_if_stm(mc)
+ mc.SEGTL()
mc.MOV_jr(rst, eax.value)
#
# must restore 'ebp' from its saved value in the shadowstack
@@ -702,7 +681,7 @@
assert rx86.fits_in_32bits(relative_target)
#
if not tok.is_guard_not_invalidated:
- mc = codebuf.MachineCodeBlockWrapper()
+ mc = codebuf.MachineCodeBlockWrapper(self.cpu)
mc.writeimm32(relative_target)
mc.copy_to_raw_memory(addr)
else:
@@ -727,7 +706,7 @@
if WORD == 8:
for pos_after_jz in self.pending_memoryerror_trampoline_from:
assert self.error_trampoline_64 != 0 # only if non-empty
- mc = codebuf.MachineCodeBlockWrapper()
+ mc = codebuf.MachineCodeBlockWrapper(self.cpu)
mc.writeimm32(self.error_trampoline_64 - pos_after_jz)
mc.copy_to_raw_memory(rawstart + pos_after_jz - 4)
@@ -786,7 +765,7 @@
self.frame_depth_to_patch.append(ofs2)
def _patch_frame_depth(self, adr, allocated_depth):
- mc = codebuf.MachineCodeBlockWrapper()
+ mc = codebuf.MachineCodeBlockWrapper(self.cpu)
mc.writeimm32(allocated_depth)
mc.copy_to_raw_memory(adr)
@@ -811,7 +790,7 @@
# that. Otherwise, leave the original rel32 to the recovery stub in
# place, but clobber the recovery stub with a jump to the real
# target.
- mc = codebuf.MachineCodeBlockWrapper()
+ mc = codebuf.MachineCodeBlockWrapper(self.cpu)
if rx86.fits_in_32bits(offset):
mc.writeimm32(offset)
mc.copy_to_raw_memory(adr_jump_offset)
@@ -894,7 +873,7 @@
fn = stmtlocal.stm_invalidate_jmp_buf_fn
self.mc.CALL(imm(self.cpu.cast_ptr_to_int(fn)))
# there could have been a collection in invalidate_jmp_buf()
- self._reload_frame_if_necessary(self.mc)
+ self._reload_frame_if_necessary(self.mc, wb=False)
# the return value is the jitframe
self.mc.MOV_rr(eax.value, ebp.value)
@@ -916,9 +895,9 @@
that gives the address of the stack top. If this integer doesn't
fit in 32 bits, it will be loaded in r11.
"""
- rst = self._get_stm_tl(gcrootmap.get_root_stack_top_addr())
+ rst = mc.in_tl_segment(gcrootmap.get_root_stack_top_addr())
if rx86.fits_in_32bits(rst):
- self._tl_segment_if_stm(mc)
+ mc.SEGTL()
mc.MOV_rj(ebx.value, rst) # MOV ebx, [rootstacktop]
else:
mc.MOV_ri(X86_64_SCRATCH_REG.value, rst) # MOV r11, rootstacktop
@@ -934,9 +913,9 @@
rst = self._load_shadowstack_top_in_ebx(self.mc, gcrootmap)
self.mc.MOV_mr((ebx.value, 0), ebp.value) # MOV [ebx], ebp
self.mc.ADD_ri(ebx.value, WORD)
-
+
if rx86.fits_in_32bits(rst):
- self._tl_segment_if_stm(self.mc)
+ self.mc.SEGTL()
self.mc.MOV_jr(rst, ebx.value) # MOV [rootstacktop], ebx
else:
# The integer 'rst' doesn't fit in 32 bits, so we know that
@@ -946,9 +925,9 @@
ebx.value) # MOV [r11], ebx
def _call_footer_shadowstack(self, gcrootmap):
- rst = self._get_stm_tl(gcrootmap.get_root_stack_top_addr())
+ rst = self.mc.in_tl_segment(gcrootmap.get_root_stack_top_addr())
if rx86.fits_in_32bits(rst):
- self._tl_segment_if_stm(self.mc)
+ self.mc.SEGTL()
self.mc.SUB_ji8(rst, WORD) # SUB [rootstacktop], WORD
else:
self.mc.MOV_ri(ebx.value, rst) # MOV ebx, rootstacktop
@@ -968,7 +947,7 @@
baseofs = self.cpu.get_baseofs_of_frame_field()
newlooptoken.compiled_loop_token.update_frame_info(
oldlooptoken.compiled_loop_token, baseofs)
- mc = codebuf.MachineCodeBlockWrapper()
+ mc = codebuf.MachineCodeBlockWrapper(self.cpu)
mc.JMP(imm(target))
if WORD == 4: # keep in sync with prepare_loop()
assert mc.get_relative_pos() == 5
@@ -1215,25 +1194,17 @@
cb = callbuilder.CallBuilder(self, fnloc, arglocs)
cb.emit_no_collect()
- def _reload_frame_if_necessary(self, mc, align_stack=False):
+ def _reload_frame_if_necessary(self, mc, align_stack=False, wb=True):
gc_ll_descr = self.cpu.gc_ll_descr
gcrootmap = gc_ll_descr.gcrootmap
if gcrootmap and gcrootmap.is_shadow_stack:
- rst = self._get_stm_tl(gcrootmap.get_root_stack_top_addr())
- self._tl_segment_if_stm(mc)
+ rst = mc.in_tl_segment(gcrootmap.get_root_stack_top_addr())
+ mc.SEGTL()
mc.MOV(ecx, heap(rst))
mc.MOV(ebp, mem(ecx, -WORD))
#
- if gcrootmap and gcrootmap.is_stm:
- if not hasattr(gc_ll_descr, 'A2Wdescr'):
- raise Exception("unreachable code")
- wbdescr = gc_ll_descr.A2Wdescr
- self._stm_barrier_fastpath(mc, wbdescr, [ebp], is_frame=True,
- align_stack=align_stack)
- return
- #
wbdescr = gc_ll_descr.write_barrier_descr
- if gcrootmap and wbdescr:
+ if gcrootmap and wbdescr and wb:
# frame never uses card marking, so we enforce this is not
# an array
self._write_barrier_fastpath(mc, wbdescr, [ebp], array=False,
@@ -1745,8 +1716,8 @@
def genop_guard_guard_no_exception(self, ign_1, guard_op, guard_token,
locs, ign_2):
- ea = self._get_stm_tl(self.cpu.pos_exception())
- self._tl_segment_if_stm(self.mc)
+ ea = self.mc.in_tl_segment(self.cpu.pos_exception())
+ self.mc.SEGTL()
self.mc.CMP(heap(ea), imm0)
self.implement_guard(guard_token, 'NZ')
@@ -1760,8 +1731,8 @@
locs, resloc):
loc = locs[0]
loc1 = locs[1]
- ea = self._get_stm_tl(self.cpu.pos_exception())
- self._tl_segment_if_stm(self.mc)
+ ea = self.mc.in_tl_segment(self.cpu.pos_exception())
+ self.mc.SEGTL()
self.mc.MOV(loc1, heap(ea))
self.mc.CMP(loc1, loc)
self.implement_guard(guard_token, 'NE')
@@ -1772,42 +1743,43 @@
""" Resest the exception. If excvalloc is None, then store it on the
frame in jf_guard_exc
"""
- eva = self._get_stm_tl(self.cpu.pos_exc_value())
- ea = self._get_stm_tl(self.cpu.pos_exception())
+ eva = mc.in_tl_segment(self.cpu.pos_exc_value())
+ ea = mc.in_tl_segment(self.cpu.pos_exception())
#
- self._tl_segment_if_stm(mc)
if excvalloc is not None:
assert excvalloc.is_core_reg()
+ mc.SEGTL()
mc.MOV(excvalloc, heap(eva))
elif tmploc is not None: # if both are None, just ignore
ofs = self.cpu.get_ofs_of_frame_field('jf_guard_exc')
+ mc.SEGTL()
mc.MOV(tmploc, heap(eva))
mc.MOV(RawEbpLoc(ofs), tmploc)
#
if exctploc is not None:
assert exctploc.is_core_reg()
- self._tl_segment_if_stm(mc)
+ mc.SEGTL()
mc.MOV(exctploc, heap(ea))
#
- self._tl_segment_if_stm(mc)
+ mc.SEGTL()
mc.MOV(heap(ea), imm0)
- self._tl_segment_if_stm(mc)
+ mc.SEGTL()
mc.MOV(heap(eva), imm0)
def _restore_exception(self, mc, excvalloc, exctploc, tmploc=None):
- eva = self._get_stm_tl(self.cpu.pos_exc_value())
- ea = self._get_stm_tl(self.cpu.pos_exception())
+ eva = mc.in_tl_segment(self.cpu.pos_exc_value())
+ ea = mc.in_tl_segment(self.cpu.pos_exception())
if excvalloc is not None:
- self._tl_segment_if_stm(mc)
+ mc.SEGTL()
mc.MOV(heap(eva), excvalloc)
else:
assert tmploc is not None
ofs = self.cpu.get_ofs_of_frame_field('jf_guard_exc')
mc.MOV(tmploc, RawEbpLoc(ofs))
mc.MOV_bi(ofs, 0)
- self._tl_segment_if_stm(mc)
+ mc.SEGTL()
mc.MOV(heap(eva), tmploc)
- self._tl_segment_if_stm(mc)
+ mc.SEGTL()
mc.MOV(heap(ea), exctploc)
def _gen_guard_overflow(self, guard_op, guard_token):
@@ -2000,20 +1972,20 @@
mc.MOVSD_xb(i, (ofs + i * coeff) * WORD + base_ofs)
def _build_failure_recovery(self, exc, withfloats=False):
- mc = codebuf.MachineCodeBlockWrapper()
+ mc = codebuf.MachineCodeBlockWrapper(self.cpu)
self.mc = mc
self._push_all_regs_to_frame(mc, [], withfloats)
if exc:
# We might have an exception pending. Load it into ebx...
- eva = self._get_stm_tl(self.cpu.pos_exc_value())
- ea = self._get_stm_tl(self.cpu.pos_exception())
- self._tl_segment_if_stm(mc)
+ eva = mc.in_tl_segment(self.cpu.pos_exc_value())
+ ea = mc.in_tl_segment(self.cpu.pos_exception())
+ mc.SEGTL()
mc.MOV(ebx, heap(eva))
- self._tl_segment_if_stm(mc)
+ mc.SEGTL()
mc.MOV(heap(ea), imm0)
- self._tl_segment_if_stm(mc)
+ mc.SEGTL()
mc.MOV(heap(eva), imm0)
# ...and save ebx into 'jf_guard_exc'
offset = self.cpu.get_ofs_of_frame_field('jf_guard_exc')
@@ -2424,46 +2396,13 @@
# XXX if the next operation is a GUARD_NO_EXCEPTION, we should
# somehow jump over it too in the fast path
-
- def _cond_allocate_in_nursery_or_slowpath(self, mc, gcmap):
- # needed for slowpath:
- # eax = nursery_current
- # edi = nursery_current + size
- #
- # cmp nursery_current+size > nursery_nextlimit
- nnl = self._get_stm_tl(rstm.get_nursery_nextlimit_adr())
- self._tl_segment_if_stm(mc)
- mc.CMP_rj(edi.value, nnl)
- mc.J_il8(rx86.Conditions['NA'], 0) # patched later
- jmp_adr = mc.get_relative_pos()
- #
- # == SLOWPATH ==
- # save the gcmap
- self.push_gcmap(mc, gcmap, mov=True)
- mc.CALL(imm(self.malloc_slowpath))
- mc.JMP_l8(0) # XXX: is JMP over 1 instr good?
- jmp2_adr = mc.get_relative_pos()
- #
- # == FASTPATH ==
- offset = mc.get_relative_pos() - jmp_adr
- assert 0 < offset <= 127
- mc.overwrite(jmp_adr-1, chr(offset))
- #
- # stm_nursery_current = stm_nursery_current+size
- nc = self._get_stm_tl(rstm.get_nursery_current_adr())
- self._tl_segment_if_stm(mc)
- mc.MOV_jr(nc, edi.value)
- #
- # END
- offset = mc.get_relative_pos() - jmp2_adr
- assert 0 < offset <= 127
- mc.overwrite(jmp2_adr-1, chr(offset))
def malloc_cond(self, nursery_free_adr, nursery_top_adr, size, gcmap):
- assert not self.cpu.gc_ll_descr.stm
assert size & (WORD-1) == 0 # must be correctly aligned
+ self.mc.SEGC7()
self.mc.MOV(eax, heap(nursery_free_adr))
self.mc.LEA_rm(edi.value, (eax.value, size))
+ self.mc.SEGC7()
self.mc.CMP(edi, heap(nursery_top_adr))
self.mc.J_il8(rx86.Conditions['NA'], 0) # patched later
jmp_adr = self.mc.get_relative_pos()
@@ -2473,6 +2412,7 @@
offset = self.mc.get_relative_pos() - jmp_adr
assert 0 < offset <= 127
self.mc.overwrite(jmp_adr-1, chr(offset))
+ self.mc.SEGC7()
self.mc.MOV(heap(nursery_free_adr), edi)
def malloc_cond_varsize_frame(self, nursery_free_adr, nursery_top_adr,
@@ -2482,6 +2422,7 @@
if sizeloc is eax:
self.mc.MOV(edi, sizeloc)
sizeloc = edi
+ self.mc.SEGC7()
self.mc.MOV(eax, heap(nursery_free_adr))
if self.cpu.gc_ll_descr.stm:
constsize = self.cpu.get_baseofs_of_frame_field()
@@ -2492,6 +2433,7 @@
self.mc.ADD_rr(edi.value, eax.value)
else:
self.mc.LEA_ra(edi.value, (eax.value, sizeloc.value, 0, 0))
+ self.mc.SEGC7()
self.mc.CMP(edi, heap(nursery_top_adr))
self.mc.J_il8(rx86.Conditions['NA'], 0) # patched later
jmp_adr = self.mc.get_relative_pos()
@@ -2501,6 +2443,7 @@
offset = self.mc.get_relative_pos() - jmp_adr
assert 0 < offset <= 127
self.mc.overwrite(jmp_adr-1, chr(offset))
+ self.mc.SEGC7()
self.mc.MOV(heap(nursery_free_adr), edi)
def malloc_cond_varsize(self, kind, nursery_free_adr, nursery_top_adr,
@@ -2522,6 +2465,7 @@
self.mc.J_il8(rx86.Conditions['A'], 0) # patched later
jmp_adr0 = self.mc.get_relative_pos()
+ self.mc.SEGC7()
self.mc.MOV(eax, heap(nursery_free_adr))
if valid_addressing_size(itemsize):
shift = get_scale(itemsize)
@@ -2542,6 +2486,7 @@
self.mc.AND_ri(edi.value, ~(WORD - 1))
# now edi contains the total size in bytes, rounded up to a multiple
# of WORD, plus nursery_free_adr
+ self.mc.SEGC7()
self.mc.CMP(edi, heap(nursery_top_adr))
self.mc.J_il8(rx86.Conditions['NA'], 0) # patched later
jmp_adr1 = self.mc.get_relative_pos()
@@ -2573,6 +2518,7 @@
# write down the tid, but not if it's the result of the CALL
self.mc.MOV(mem(eax, 0), imm(arraydescr.tid))
# while we're at it, this line is not needed if we've done the CALL
+ self.mc.SEGC7()
self.mc.MOV(heap(nursery_free_adr), edi)
#
offset = self.mc.get_relative_pos() - jmp_location
@@ -2655,16 +2601,18 @@
def genop_discard_stm_read(self, op, arglocs):
assert IS_X86_64, "needed for X86_64_SCRATCH_REG"
mc = self.mc
- rm8reg = X86_64_SCRATCH_REG.value | BYTE_REG_FLAG
- xxxxxx #load STM_SEGMENT->transaction_read_version into rm8reg
+ rmreg = X86_64_SCRATCH_REG.value
+ mc.SEGC7()
+ mc.MOVZX8_rj(rmreg, rstm.adr_transaction_read_version)
+ #
loc_src, loc_tmp = arglocs
if tmp_loc is None:
assert isinstance(loc_src, ImmedLoc)
assert loc_src.value > 0
mem = loc_src.value >> 4
assert rx86.fits_in_32bits(mem)
- tl_segment_prefix(mc)
- mc.MOV8_jr(mem, rm8reg)
+ mc.SEGC7()
+ mc.MOV8_jr(mem, rmreg | rx86.BYTE_REG_FLAG)
else:
assert isinstance(loc_tmp, RegLoc)
if isinstance(loc_src, ImmedLoc):
@@ -2673,8 +2621,8 @@
if loc_tmp is not loc_src:
mc.MOV(loc_tmp, loc_src)
mc.SHR_ri(loc_tmp.value, 4)
- tl_segment_prefix(mc)
- mc.MOV8_mr((loc_tmp.value, 0), rm8reg)
+ mc.SEGC7()
+ mc.MOV8_mr((loc_tmp.value, 0), rmreg | rx86.BYTE_REG_FLAG)
genop_discard_list = [Assembler386.not_implemented_op_discard] * rop._LAST
diff --git a/rpython/jit/backend/x86/codebuf.py
b/rpython/jit/backend/x86/codebuf.py
--- a/rpython/jit/backend/x86/codebuf.py
+++ b/rpython/jit/backend/x86/codebuf.py
@@ -1,5 +1,6 @@
from rpython.rtyper.lltypesystem import lltype, rffi
from rpython.rlib.rarithmetic import intmask
+from rpython.rlib.objectmodel import we_are_translated
from rpython.rlib.debug import debug_start, debug_print, debug_stop
from rpython.rlib.debug import have_debug_prints
from rpython.jit.backend.llsupport.asmmemmgr import BlockBuilderMixin
@@ -21,7 +22,8 @@
class MachineCodeBlockWrapper(BlockBuilderMixin,
LocationCodeBuilder,
codebuilder_cls):
- def __init__(self):
+ def __init__(self, cpu):
+ self.stm = cpu.gc_ll_descr.stm
self.init_block_builder()
# a list of relative positions; for each position p, the bytes
# at [p-4:p] encode an absolute address that will need to be
@@ -52,3 +54,30 @@
adr[0] = intmask(adr[0] - p)
valgrind.discard_translations(addr, self.get_relative_pos())
self._dump(addr, "jit-backend-dump", backend_name)
+
+ def in_tl_segment(self, adr):
+ """Makes 'adr' relative to threadlocal-base if we run in STM.
+ Before using such a relative address, call SEGTL()."""
+ if self.stm and we_are_translated():
+ # only for STM and not during tests
+ from rpython.jit.backend.x86 import stmtlocal, rx86
+ result = adr - stmtlocal.threadlocal_base()
+ assert rx86.fits_in_32bits(result)
+ return result
+ return adr
+
+ def SEGTL(self):
+ """Insert segment prefix for thread-local memory if we run
+ in STM and not during testing. This is used to access thread-local
+ data structures like the struct stm_thread_local_s."""
+ if self.stm and we_are_translated():
+ from rpython.jit.backend.x86 import stmtlocal
+ stmtlocal.tl_segment_prefix(self)
+
+ def SEGC7(self):
+ """Insert segment prefix for the stmgc-c7 segment of memory
+ if we run in STM and not during testing. This is used to access
+ any GC object, or things in the STM_SEGMENT structure."""
+ if self.stm and we_are_translated():
+ from rpython.jit.backend.x86 import stmtlocal
+ stmtlocal.c7_segment_prefix(self)
diff --git a/rpython/jit/backend/x86/regalloc.py
b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -839,13 +839,10 @@
gcmap = self.get_gcmap([eax, edi]) # allocate the gcmap *before*
self.rm.possibly_free_var(tmp_box)
#
- if gc_ll_descr.stm:
- self.assembler.malloc_cond_stm(size, gcmap)
- else:
- self.assembler.malloc_cond(
- gc_ll_descr.get_nursery_free_addr(),
- gc_ll_descr.get_nursery_top_addr(),
- size, gcmap)
+ self.assembler.malloc_cond(
+ gc_ll_descr.get_nursery_free_addr(),
+ gc_ll_descr.get_nursery_top_addr(),
+ size, gcmap)
def consider_call_malloc_nursery_varsize_frame(self, op):
gc_ll_descr = self.assembler.cpu.gc_ll_descr
@@ -893,16 +890,11 @@
#
itemsize = op.getarg(1).getint()
maxlength = (gc_ll_descr.max_size_of_young_obj - WORD * 2) / itemsize
- if gc_ll_descr.stm:
- self.assembler.malloc_cond_varsize_stm(
- op.getarg(0).getint(),
- lengthloc, itemsize, maxlength, gcmap, arraydescr)
- else:
- self.assembler.malloc_cond_varsize(
- op.getarg(0).getint(),
- gc_ll_descr.get_nursery_free_addr(),
- gc_ll_descr.get_nursery_top_addr(),
- lengthloc, itemsize, maxlength, gcmap, arraydescr)
+ self.assembler.malloc_cond_varsize(
+ op.getarg(0).getint(),
+ gc_ll_descr.get_nursery_free_addr(),
+ gc_ll_descr.get_nursery_top_addr(),
+ lengthloc, itemsize, maxlength, gcmap, arraydescr)
def get_gcmap(self, forbidden_regs=[], noregs=False):
frame_depth = self.fm.get_frame_depth()
diff --git a/rpython/jit/backend/x86/runner.py
b/rpython/jit/backend/x86/runner.py
--- a/rpython/jit/backend/x86/runner.py
+++ b/rpython/jit/backend/x86/runner.py
@@ -131,7 +131,7 @@
rstm.stop_all_other_threads()
for addr, tgt in looptoken.compiled_loop_token.invalidate_positions:
- mc = codebuf.MachineCodeBlockWrapper()
+ mc = codebuf.MachineCodeBlockWrapper(self)
mc.JMP_l(tgt)
assert mc.get_relative_pos() == 5 # [JMP] [tgt 4 bytes]
mc.copy_to_raw_memory(addr - 1)
diff --git a/rpython/jit/backend/x86/stmtlocal.py
b/rpython/jit/backend/x86/stmtlocal.py
--- a/rpython/jit/backend/x86/stmtlocal.py
+++ b/rpython/jit/backend/x86/stmtlocal.py
@@ -32,6 +32,10 @@
else:
mc.writechar('\x64') # %fs:
+def c7_segment_prefix(mc):
+ assert WORD == 8
+ mc.writechar('\x65') # %gs:
+
# special STM functions called directly by the JIT backend
stm_should_break_transaction_fn = rffi.llexternal(
diff --git a/rpython/jit/metainterp/pyjitpl.py
b/rpython/jit/metainterp/pyjitpl.py
--- a/rpython/jit/metainterp/pyjitpl.py
+++ b/rpython/jit/metainterp/pyjitpl.py
@@ -1546,7 +1546,7 @@
_callable=lambda : False)
FUNC = lltype.typeOf(self.stm_should_break_transaction).TO
- ei = EffectInfo([], [], [], [],
+ ei = EffectInfo([], [], [], [], [], [],
EffectInfo.EF_CANNOT_RAISE,
oopspecindex=EffectInfo.OS_JIT_STM_SHOULD_BREAK_TRANSACTION,
can_invalidate=False)
diff --git a/rpython/memory/gc/stmgc.py b/rpython/memory/gc/stmgc.py
--- a/rpython/memory/gc/stmgc.py
+++ b/rpython/memory/gc/stmgc.py
@@ -33,7 +33,9 @@
VISIT_FPTR = lltype.Ptr(lltype.FuncType([llmemory.Address], lltype.Void))
- minimal_size_in_nursery = llmemory.sizeof(HDR)
+ JIT_WB_IF_FLAG = 0x01 # value of _STM_GCFLAG_WRITE_BARRIER
+ stm_fast_alloc = 66*1024 # value of _STM_FAST_ALLOC in stmgc.h
+ minimal_size_in_nursery = 16 # hard-coded lower limit
TRANSLATION_PARAMS = {
}
@@ -97,7 +99,7 @@
@classmethod
def JIT_max_size_of_young_obj(cls):
- return cls.GC_NURSERY_SECTION
+ return cls.stm_fast_alloc
@classmethod
def JIT_minimal_size_in_nursery(cls):
diff --git a/rpython/memory/gctransform/framework.py
b/rpython/memory/gctransform/framework.py
--- a/rpython/memory/gctransform/framework.py
+++ b/rpython/memory/gctransform/framework.py
@@ -773,6 +773,7 @@
v_typeid], resultvar=op.result)
def _gc_adr_of_gc_attr(self, hop, attrname):
+ assert not self.translator.config.translation.stm
if getattr(self.gcdata.gc, attrname, None) is None:
raise NotImplementedError("gc_adr_of_%s only for generational gcs"
% (attrname,))
@@ -780,7 +781,6 @@
ofs = llmemory.offsetof(self.c_const_gc.concretetype.TO,
'inst_' + attrname)
c_ofs = rmodel.inputconst(lltype.Signed, ofs)
- assert not self.translator.config.translation.stm, "XXX"
v_gc_adr = hop.genop('cast_ptr_to_adr', [self.c_const_gc],
resulttype=llmemory.Address)
hop.genop('adr_add', [v_gc_adr, c_ofs], resultvar=op.result)
diff --git a/rpython/rlib/rstm.py b/rpython/rlib/rstm.py
--- a/rpython/rlib/rstm.py
+++ b/rpython/rlib/rstm.py
@@ -1,4 +1,5 @@
from rpython.rlib.objectmodel import we_are_translated, specialize
+from rpython.rlib.objectmodel import CDefinedIntSymbolic
from rpython.rtyper.lltypesystem import lltype, rffi, rstr, llmemory
from rpython.rtyper.lltypesystem.lloperation import llop
from rpython.rtyper.extregistry import ExtRegistryEntry
@@ -6,39 +7,17 @@
TID = rffi.UINT
-tid_offset = CDefinedIntSymbolic('offsetof(struct rpyobj_s, tid)', default=4)
+tid_offset = CDefinedIntSymbolic('offsetof(struct rpyobj_s, tid)')
+adr_nursery_free = CDefinedIntSymbolic('(long)(&STM_SEGMENT->nursery_current)')
+adr_nursery_top = CDefinedIntSymbolic('(long)(&STM_SEGMENT->nursery_end)')
+adr_transaction_read_version = (
+ CDefinedIntSymbolic('(long)(&STM_SEGMENT->transaction_read_version)'))
-@dont_look_inside
-def get_nursery_current_adr():
- addr = llop.stm_get_adr_of_nursery_current(llmemory.Address)
- return rffi.cast(lltype.Signed, addr)
-
-@dont_look_inside
-def get_nursery_nextlimit_adr():
- addr = llop.stm_get_adr_of_nursery_nextlimit(llmemory.Address)
- return rffi.cast(lltype.Signed, addr)
-
-@dont_look_inside
-def get_active_adr():
- addr = llop.stm_get_adr_of_active(llmemory.Address)
- return rffi.cast(lltype.Signed, addr)
-
-@dont_look_inside
-def get_adr_of_private_rev_num():
- addr = llop.stm_get_adr_of_private_rev_num(llmemory.Address)
- return rffi.cast(lltype.Signed, addr)
-
-@dont_look_inside
-def get_adr_of_read_barrier_cache():
- addr = llop.stm_get_adr_of_read_barrier_cache(llmemory.Address)
- return rffi.cast(lltype.Signed, addr)
-
def jit_stm_transaction_break_point():
if we_are_translated():
llop.jit_stm_transaction_break_point(lltype.Void)
-
def jit_stm_should_break_transaction(if_there_is_no_other):
# if_there_is_no_other means that we use this point only
# if there is no other break point in the trace.
@@ -47,20 +26,11 @@
return llop.jit_stm_should_break_transaction(lltype.Bool,
if_there_is_no_other)
-
@dont_look_inside
def become_inevitable():
llop.stm_become_inevitable(lltype.Void)
@dont_look_inside
-def stop_all_other_threads():
- llop.stm_stop_all_other_threads(lltype.Void)
-
-@dont_look_inside
-def partial_commit_and_resume_other_threads():
- llop.stm_partial_commit_and_resume_other_threads(lltype.Void)
-
-@dont_look_inside
def should_break_transaction():
return we_are_translated() and (
llop.stm_should_break_transaction(lltype.Bool))
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit