Author: Armin Rigo <[email protected]>
Branch: stmgc-c7
Changeset: r75598:3b8f6c3b4fb3
Date: 2015-01-31 02:30 +0100
http://bitbucket.org/pypy/pypy/changeset/3b8f6c3b4fb3/
Log: In-progress: don't store the stm_location in the gcmap. It's nice
to avoid more run-time writes of constants, but the logic to fetch
it from there is completely missing and would be hard...
diff --git a/rpython/jit/backend/llsupport/gcmap.py
b/rpython/jit/backend/llsupport/gcmap.py
--- a/rpython/jit/backend/llsupport/gcmap.py
+++ b/rpython/jit/backend/llsupport/gcmap.py
@@ -4,29 +4,15 @@
from rpython.rlib.rarithmetic import r_uint
from rpython.jit.backend.llsupport.symbolic import WORD
-GCMAP_STM_LOCATION = 2 # xxx add this only if stm
-
-def allocate_gcmap(assembler, frame_depth, fixed_size, stm_location=None):
+def allocate_gcmap(assembler, frame_depth, fixed_size):
size = frame_depth + fixed_size
- malloc_size = (size // WORD // 8 + 1) + GCMAP_STM_LOCATION + 1
+ malloc_size = (size // WORD // 8 + 1) + 1
rawgcmap = assembler.datablockwrapper.malloc_aligned(WORD * malloc_size,
WORD)
# set the length field
rffi.cast(rffi.CArrayPtr(lltype.Signed), rawgcmap)[0] = malloc_size - 1
gcmap = rffi.cast(lltype.Ptr(jitframe.GCMAP), rawgcmap)
# zero the area
- for i in range(malloc_size - 3):
+ for i in range(malloc_size - 1):
gcmap[i] = r_uint(0)
- # write the stm_location in the last two words
- raw_stm_location = extract_raw_stm_location(stm_location)
- gcmap[malloc_size - 3], gcmap[malloc_size - 2] = raw_stm_location
return gcmap
-
-def extract_raw_stm_location(stm_location):
- if stm_location is not None:
- num = rffi.cast(lltype.Unsigned, stm_location.num)
- ref = rffi.cast(lltype.Unsigned, stm_location.ref)
- else:
- num = r_uint(0)
- ref = r_uint(0)
- return (num, ref)
diff --git a/rpython/jit/backend/x86/assembler.py
b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -5,7 +5,6 @@
from rpython.jit.backend.llsupport.assembler import (GuardToken, BaseAssembler,
DEBUG_COUNTER, debug_bridge)
from rpython.jit.backend.llsupport.asmmemmgr import MachineDataBlockWrapper
-from rpython.jit.backend.llsupport.gcmap import extract_raw_stm_location
from rpython.jit.metainterp.history import Const, Box, VOID
from rpython.jit.metainterp.history import AbstractFailDescr, INT, REF, FLOAT
from rpython.rtyper.lltypesystem import lltype, rffi, rstr, llmemory
@@ -401,19 +400,23 @@
# current 'stm_location' so that it is found. The easiest
# is to simply push it on the shadowstack, from its source
# location as two extra arguments on the machine stack
- # (at this point containing: [retaddr][ref][num][obj]...)
+ # (at this point containing: [usual STM_FRAME_FIXED_SIZE]
+ # [obj]
+ # [num]
+ # [ref]
+ # [retaddr])
# XXX this should also be done if 'for_frame' is true...
- mc.MOV(esi, self.heap_shadowstack_top())
+ mc.MOV_rs(esi.value, STM_SHADOWSTACK_BASE_OFS + 4 * WORD)
+ # esi = base address in the shadowstack + 1
+ # write the marker to [esi - 1] and [esi + 7]
mc.MOV_rs(edi.value, 2 * WORD) # [num]
# do here the 'num = (num<<1) + 1' rather than at the caller
# site, to increase the chances that it can use PUSH_i8
mc.LEA_ra(edi.value, (self.SEGMENT_NO, rx86.NO_BASE_REGISTER,
edi.value, 1, +1))
- mc.MOV_mr((self.SEGMENT_NO, esi.value, 0), edi.value)
+ mc.MOV_mr((self.SEGMENT_NO, esi.value, -1), edi.value)
mc.MOV_rs(edi.value, 1 * WORD) # [ref]
- mc.MOV_mr((self.SEGMENT_NO, esi.value, WORD), edi.value)
- mc.LEA_rm(esi.value, (self.SEGMENT_NO, esi.value, 2 * WORD))
- mc.MOV(self.heap_shadowstack_top(), esi)
+ mc.MOV_mr((self.SEGMENT_NO, esi.value, +7), edi.value)
mc.MOV_rs(edi.value, 3 * WORD) # [obj]
elif IS_X86_32:
# we have 2 extra words on stack for retval and we pass 1 extra
@@ -463,11 +466,6 @@
#
if not for_frame:
- if self.cpu.gc_ll_descr.stm:
- # SUB touches CPU flags
- mc.MOV(esi, self.heap_shadowstack_top())
- mc.LEA_rm(esi.value, (self.SEGMENT_NO, esi.value, -2 * WORD))
- mc.MOV(self.heap_shadowstack_top(), esi)
if IS_X86_32:
# ADD touches CPU flags
mc.LEA_rs(esp.value, 2 * WORD)
@@ -870,9 +868,14 @@
# again (ensured by the code calling the loop))
mc = self.mc
mc.MOV(ebx, self.heap_shadowstack_top())
- mc.MOV_mr((self.SEGMENT_NO, ebx.value, 0), ebp.value)
- # MOV [ebx], ebp
if self.cpu.gc_ll_descr.stm:
+ # the first two words are usually the stm_location marker,
+ # but for now it can be invalid (as long as it's not fully
+ # random)
+ mc.MOV_mr((self.SEGMENT_NO, ebx.value, 0 * WORD), ebp.value)
+ mc.MOV_mr((self.SEGMENT_NO, ebx.value, 1 * WORD), ebp.value)
+ mc.MOV_mr((self.SEGMENT_NO, ebx.value, 2 * WORD), ebp.value)
+
# inlining stm_rewind_jmp_enterframe()
r11 = X86_64_SCRATCH_REG
rjh = self.heap_rjthread_head()
@@ -880,13 +883,15 @@
mc.MOV(r11, rjh) # MOV r11, [rjthread.head]
mc.MOV_sr(STM_SHADOWSTACK_BASE_OFS, ebx.value)
# MOV [esp+ssbase], ebx
- mc.ADD_ri(ebx.value, WORD-1) # ADD ebx, 7
+ mc.ADD_ri(ebx.value, 3*WORD-1) # ADD ebx, 23
mc.MOV_sr(STM_PREV_OFS, r11.value) # MOV [esp+prev], r11
mc.MOV(self.heap_shadowstack_top(), ebx) # MOV [rootstacktop], ebx
mc.LEA_rs(r11.value, STM_JMPBUF_OFS) # LEA r11, [esp+bufofs]
mc.MOV(rjh, r11) # MOV [rjthread.head], r11
#
else:
+ mc.MOV_mr((self.SEGMENT_NO, ebx.value, 0), ebp.value)
+ # MOV [ebx], ebp
mc.ADD_ri(ebx.value, WORD) # ADD ebx, WORD
mc.MOV(self.heap_shadowstack_top(), ebx) # MOV [rootstacktop], ebx
@@ -1934,6 +1939,22 @@
self.mc.JMP(imm(target))
return startpos
+ def update_stm_location(self, extra_stack=0):
+ if self.cpu.gc_ll_descr.stm:
+ num, ref = self._regalloc.extract_raw_stm_location()
+ mc.MOV_rs(r11.value, STM_SHADOWSTACK_BASE_OFS + extra_stack)
+ # r11 = base address in the shadowstack + 1
+ # write the marker to [esi - 1] and [esi + 7]
+ for (targetofs, number) in [(-1, num), (+7, ref)]:
+ if rx86.fits_in_32bits(number):
+ mc.MOV_mi((self.SEGMENT_NO, r11.value, targetofs), number)
+ else:
+ mc.MOV32_mi((self.SEGMENT_NO, r11.value, targetofs),
+ rffi.cast(lltype.Signed,
+ rffi.cast(rffi.INT, number)))
+ mc.MOV32_mi((self.SEGMENT_NO, r11.value, targetofs + 4),
+ number >> 32)
+
def push_gcmap(self, mc, gcmap, push=False, mov=False, store=False):
if push:
mc.PUSH(imm(rffi.cast(lltype.Signed, gcmap)))
@@ -2267,10 +2288,9 @@
# still ok. The one or three words pushed here are removed
# by the callee.
assert IS_X86_64
- num, ref = extract_raw_stm_location(
- self._regalloc.stm_location)
- mc.PUSH(imm(rffi.cast(lltype.Signed, num)))
- mc.PUSH(imm(rffi.cast(lltype.Signed, ref)))
+ num, ref = self._regalloc.extract_raw_stm_location()
+ mc.PUSH(imm(num))
+ mc.PUSH(imm(ref))
if is_frame and align_stack:
mc.SUB_ri(esp.value, 16 - WORD) # erase the return address
mc.CALL(imm(self.wb_slowpath[helper_num]))
@@ -2433,6 +2453,7 @@
self.mc.J_il8(rx86.Conditions['Z'], 0) # patched later
jmp_adr = self.mc.get_relative_pos()
#
+ self.update_stm_location()
self.push_gcmap(self.mc, gcmap, store=True)
#
# first save away the 4 registers from 'cond_call_register_arguments'
diff --git a/rpython/jit/backend/x86/callbuilder.py
b/rpython/jit/backend/x86/callbuilder.py
--- a/rpython/jit/backend/x86/callbuilder.py
+++ b/rpython/jit/backend/x86/callbuilder.py
@@ -84,6 +84,7 @@
self.asm.set_extra_stack_depth(self.mc, -self.current_esp)
noregs = self.asm.cpu.gc_ll_descr.is_shadow_stack()
gcmap = self.asm._regalloc.get_gcmap([eax], noregs=noregs)
+ self.asm.update_stm_location(-self.current_esp)
self.asm.push_gcmap(self.mc, gcmap, store=True)
def pop_gcmap(self):
@@ -204,7 +205,7 @@
# in 'ebx'), and if not, we fall back to 'reacqgil_addr'.
mc.J_il8(rx86.Conditions['NE'], 0)
jne_location = mc.get_relative_pos()
- # here, ecx is zero (so rpy_fastgil was not acquired)
+ # here, ecx (=old_value) is zero (so rpy_fastgil was not acquired)
rst = gcrootmap.get_root_stack_top_addr()
mc = self.mc
mc.CMP(ebx, self.asm.heap_tl(rst))
@@ -212,6 +213,7 @@
je_location = mc.get_relative_pos()
# revert the rpy_fastgil acquired above, so that the
# general 'reacqgil_addr' below can acquire it again...
+ assert ecx is old_value
mc.MOV(heap(self.asm.SEGMENT_NO, fastgil), ecx)
# patch the JNE above
offset = mc.get_relative_pos() - jne_location
diff --git a/rpython/jit/backend/x86/regalloc.py
b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -325,10 +325,7 @@
self.xrm.position = i
#
if op.stm_location is not None:
- if (self.stm_location is None or
- self.stm_location.num != op.stm_location.num or
- self.stm_location.ref != op.stm_location.ref):
- self.stm_location = op.stm_location
+ self.stm_location = op.stm_location
#
if op.has_no_side_effect() and op.result not in self.longevity:
i += 1
@@ -945,9 +942,18 @@
gc_ll_descr.get_nursery_top_addr(),
lengthloc, itemsize, maxlength, gcmap, arraydescr)
+ def extract_raw_stm_location(self):
+ if self.stm_location is not None:
+ num = rffi.cast(lltype.Signed, self.stm_location.num)
+ ref = rffi.cast(lltype.Signed, self.stm_location.ref)
+ else:
+ num = 0
+ ref = 0
+ return (num, ref)
+
def get_empty_gcmap(self, frame_depth):
return allocate_gcmap(self.assembler, frame_depth,
- JITFRAME_FIXED_SIZE, self.stm_location)
+ JITFRAME_FIXED_SIZE)
def get_gcmap(self, forbidden_regs=[], noregs=False):
frame_depth = self.fm.get_frame_depth()
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit