Author: Armin Rigo <[email protected]>
Branch: stmgc-c7
Changeset: r75598:3b8f6c3b4fb3
Date: 2015-01-31 02:30 +0100
http://bitbucket.org/pypy/pypy/changeset/3b8f6c3b4fb3/

Log:    In-progress: don't store the stm_location in the gcmap. It's nice
        to avoid more run-time writes of constants, but the logic to fetch
        it from there is completely missing and would be hard...

diff --git a/rpython/jit/backend/llsupport/gcmap.py 
b/rpython/jit/backend/llsupport/gcmap.py
--- a/rpython/jit/backend/llsupport/gcmap.py
+++ b/rpython/jit/backend/llsupport/gcmap.py
@@ -4,29 +4,15 @@
 from rpython.rlib.rarithmetic import r_uint
 from rpython.jit.backend.llsupport.symbolic import WORD
 
-GCMAP_STM_LOCATION = 2     # xxx add this only if stm
-
-def allocate_gcmap(assembler, frame_depth, fixed_size, stm_location=None):
+def allocate_gcmap(assembler, frame_depth, fixed_size):
     size = frame_depth + fixed_size
-    malloc_size = (size // WORD // 8 + 1) + GCMAP_STM_LOCATION + 1
+    malloc_size = (size // WORD // 8 + 1) + 1
     rawgcmap = assembler.datablockwrapper.malloc_aligned(WORD * malloc_size,
                                                     WORD)
     # set the length field
     rffi.cast(rffi.CArrayPtr(lltype.Signed), rawgcmap)[0] = malloc_size - 1
     gcmap = rffi.cast(lltype.Ptr(jitframe.GCMAP), rawgcmap)
     # zero the area
-    for i in range(malloc_size - 3):
+    for i in range(malloc_size - 1):
         gcmap[i] = r_uint(0)
-    # write the stm_location in the last two words
-    raw_stm_location = extract_raw_stm_location(stm_location)
-    gcmap[malloc_size - 3], gcmap[malloc_size - 2] = raw_stm_location
     return gcmap
-
-def extract_raw_stm_location(stm_location):
-    if stm_location is not None:
-        num = rffi.cast(lltype.Unsigned, stm_location.num)
-        ref = rffi.cast(lltype.Unsigned, stm_location.ref)
-    else:
-        num = r_uint(0)
-        ref = r_uint(0)
-    return (num, ref)
diff --git a/rpython/jit/backend/x86/assembler.py 
b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -5,7 +5,6 @@
 from rpython.jit.backend.llsupport.assembler import (GuardToken, BaseAssembler,
                                                 DEBUG_COUNTER, debug_bridge)
 from rpython.jit.backend.llsupport.asmmemmgr import MachineDataBlockWrapper
-from rpython.jit.backend.llsupport.gcmap import extract_raw_stm_location
 from rpython.jit.metainterp.history import Const, Box, VOID
 from rpython.jit.metainterp.history import AbstractFailDescr, INT, REF, FLOAT
 from rpython.rtyper.lltypesystem import lltype, rffi, rstr, llmemory
@@ -401,19 +400,23 @@
                 # current 'stm_location' so that it is found.  The easiest
                 # is to simply push it on the shadowstack, from its source
                 # location as two extra arguments on the machine stack
-                # (at this point containing: [retaddr][ref][num][obj]...)
+                # (at this point containing:  [usual STM_FRAME_FIXED_SIZE]
+                #                             [obj]
+                #                             [num]
+                #                             [ref]
+                #                             [retaddr])
                 # XXX this should also be done if 'for_frame' is true...
-                mc.MOV(esi, self.heap_shadowstack_top())
+                mc.MOV_rs(esi.value, STM_SHADOWSTACK_BASE_OFS + 4 * WORD)
+                # esi = base address in the shadowstack + 1
+                # write the marker to [esi - 1] and [esi + 7]
                 mc.MOV_rs(edi.value, 2 * WORD)   # [num]
                 # do here the 'num = (num<<1) + 1' rather than at the caller
                 # site, to increase the chances that it can use PUSH_i8
                 mc.LEA_ra(edi.value, (self.SEGMENT_NO, rx86.NO_BASE_REGISTER,
                                       edi.value, 1, +1))
-                mc.MOV_mr((self.SEGMENT_NO, esi.value, 0), edi.value)
+                mc.MOV_mr((self.SEGMENT_NO, esi.value, -1), edi.value)
                 mc.MOV_rs(edi.value, 1 * WORD)   # [ref]
-                mc.MOV_mr((self.SEGMENT_NO, esi.value, WORD), edi.value)
-                mc.LEA_rm(esi.value, (self.SEGMENT_NO, esi.value, 2 * WORD))
-                mc.MOV(self.heap_shadowstack_top(), esi)
+                mc.MOV_mr((self.SEGMENT_NO, esi.value, +7), edi.value)
                 mc.MOV_rs(edi.value, 3 * WORD)   # [obj]
             elif IS_X86_32:
                 # we have 2 extra words on stack for retval and we pass 1 extra
@@ -463,11 +466,6 @@
         #
 
         if not for_frame:
-            if self.cpu.gc_ll_descr.stm:
-                # SUB touches CPU flags
-                mc.MOV(esi, self.heap_shadowstack_top())
-                mc.LEA_rm(esi.value, (self.SEGMENT_NO, esi.value, -2 * WORD))
-                mc.MOV(self.heap_shadowstack_top(), esi)
             if IS_X86_32:
                 # ADD touches CPU flags
                 mc.LEA_rs(esp.value, 2 * WORD)
@@ -870,9 +868,14 @@
         # again (ensured by the code calling the loop))
         mc = self.mc
         mc.MOV(ebx, self.heap_shadowstack_top())
-        mc.MOV_mr((self.SEGMENT_NO, ebx.value, 0), ebp.value)
-                                                      # MOV [ebx], ebp
         if self.cpu.gc_ll_descr.stm:
+            # the first two words are usually the stm_location marker,
+            # but for now it can be invalid (as long as it's not fully
+            # random)
+            mc.MOV_mr((self.SEGMENT_NO, ebx.value, 0 * WORD), ebp.value)
+            mc.MOV_mr((self.SEGMENT_NO, ebx.value, 1 * WORD), ebp.value)
+            mc.MOV_mr((self.SEGMENT_NO, ebx.value, 2 * WORD), ebp.value)
+
             # inlining stm_rewind_jmp_enterframe()
             r11 = X86_64_SCRATCH_REG
             rjh = self.heap_rjthread_head()
@@ -880,13 +883,15 @@
             mc.MOV(r11, rjh)                         # MOV r11, [rjthread.head]
             mc.MOV_sr(STM_SHADOWSTACK_BASE_OFS, ebx.value)
                                                      # MOV [esp+ssbase], ebx
-            mc.ADD_ri(ebx.value, WORD-1)             # ADD ebx, 7
+            mc.ADD_ri(ebx.value, 3*WORD-1)           # ADD ebx, 23
             mc.MOV_sr(STM_PREV_OFS, r11.value)       # MOV [esp+prev], r11
             mc.MOV(self.heap_shadowstack_top(), ebx) # MOV [rootstacktop], ebx
             mc.LEA_rs(r11.value, STM_JMPBUF_OFS)     # LEA r11, [esp+bufofs]
             mc.MOV(rjh, r11)                         # MOV [rjthread.head], r11
         #
         else:
+            mc.MOV_mr((self.SEGMENT_NO, ebx.value, 0), ebp.value)
+                                                     # MOV [ebx], ebp
             mc.ADD_ri(ebx.value, WORD)               # ADD ebx, WORD
             mc.MOV(self.heap_shadowstack_top(), ebx) # MOV [rootstacktop], ebx
 
@@ -1934,6 +1939,22 @@
         self.mc.JMP(imm(target))
         return startpos
 
+    def update_stm_location(self, extra_stack=0):
+        if self.cpu.gc_ll_descr.stm:
+            num, ref = self._regalloc.extract_raw_stm_location()
+            mc.MOV_rs(r11.value, STM_SHADOWSTACK_BASE_OFS + extra_stack)
+            # r11 = base address in the shadowstack + 1
+            # write the marker to [esi - 1] and [esi + 7]
+            for (targetofs, number) in [(-1, num), (+7, ref)]:
+                if rx86.fits_in_32bits(number):
+                    mc.MOV_mi((self.SEGMENT_NO, r11.value, targetofs), number)
+                else:
+                    mc.MOV32_mi((self.SEGMENT_NO, r11.value, targetofs),
+                                rffi.cast(lltype.Signed,
+                                          rffi.cast(rffi.INT, number)))
+                    mc.MOV32_mi((self.SEGMENT_NO, r11.value, targetofs + 4),
+                                number >> 32)
+
     def push_gcmap(self, mc, gcmap, push=False, mov=False, store=False):
         if push:
             mc.PUSH(imm(rffi.cast(lltype.Signed, gcmap)))
@@ -2267,10 +2288,9 @@
                 # still ok.  The one or three words pushed here are removed
                 # by the callee.
                 assert IS_X86_64
-                num, ref = extract_raw_stm_location(
-                    self._regalloc.stm_location)
-                mc.PUSH(imm(rffi.cast(lltype.Signed, num)))
-                mc.PUSH(imm(rffi.cast(lltype.Signed, ref)))
+                num, ref = self._regalloc.extract_raw_stm_location()
+                mc.PUSH(imm(num))
+                mc.PUSH(imm(ref))
         if is_frame and align_stack:
             mc.SUB_ri(esp.value, 16 - WORD) # erase the return address
         mc.CALL(imm(self.wb_slowpath[helper_num]))
@@ -2433,6 +2453,7 @@
         self.mc.J_il8(rx86.Conditions['Z'], 0) # patched later
         jmp_adr = self.mc.get_relative_pos()
         #
+        self.update_stm_location()
         self.push_gcmap(self.mc, gcmap, store=True)
         #
         # first save away the 4 registers from 'cond_call_register_arguments'
diff --git a/rpython/jit/backend/x86/callbuilder.py 
b/rpython/jit/backend/x86/callbuilder.py
--- a/rpython/jit/backend/x86/callbuilder.py
+++ b/rpython/jit/backend/x86/callbuilder.py
@@ -84,6 +84,7 @@
             self.asm.set_extra_stack_depth(self.mc, -self.current_esp)
         noregs = self.asm.cpu.gc_ll_descr.is_shadow_stack()
         gcmap = self.asm._regalloc.get_gcmap([eax], noregs=noregs)
+        self.asm.update_stm_location(-self.current_esp)
         self.asm.push_gcmap(self.mc, gcmap, store=True)
 
     def pop_gcmap(self):
@@ -204,7 +205,7 @@
             # in 'ebx'), and if not, we fall back to 'reacqgil_addr'.
             mc.J_il8(rx86.Conditions['NE'], 0)
             jne_location = mc.get_relative_pos()
-            # here, ecx is zero (so rpy_fastgil was not acquired)
+            # here, ecx (=old_value) is zero (so rpy_fastgil was not acquired)
             rst = gcrootmap.get_root_stack_top_addr()
             mc = self.mc
             mc.CMP(ebx, self.asm.heap_tl(rst))
@@ -212,6 +213,7 @@
             je_location = mc.get_relative_pos()
             # revert the rpy_fastgil acquired above, so that the
             # general 'reacqgil_addr' below can acquire it again...
+            assert ecx is old_value
             mc.MOV(heap(self.asm.SEGMENT_NO, fastgil), ecx)
             # patch the JNE above
             offset = mc.get_relative_pos() - jne_location
diff --git a/rpython/jit/backend/x86/regalloc.py 
b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -325,10 +325,7 @@
             self.xrm.position = i
             #
             if op.stm_location is not None:
-                if (self.stm_location is None or
-                    self.stm_location.num != op.stm_location.num or
-                    self.stm_location.ref != op.stm_location.ref):
-                    self.stm_location = op.stm_location
+                self.stm_location = op.stm_location
             #
             if op.has_no_side_effect() and op.result not in self.longevity:
                 i += 1
@@ -945,9 +942,18 @@
             gc_ll_descr.get_nursery_top_addr(),
             lengthloc, itemsize, maxlength, gcmap, arraydescr)
 
+    def extract_raw_stm_location(self):
+        if self.stm_location is not None:
+            num = rffi.cast(lltype.Signed, self.stm_location.num)
+            ref = rffi.cast(lltype.Signed, self.stm_location.ref)
+        else:
+            num = 0
+            ref = 0
+        return (num, ref)
+
     def get_empty_gcmap(self, frame_depth):
         return allocate_gcmap(self.assembler, frame_depth,
-                              JITFRAME_FIXED_SIZE, self.stm_location)
+                              JITFRAME_FIXED_SIZE)
 
     def get_gcmap(self, forbidden_regs=[], noregs=False):
         frame_depth = self.fm.get_frame_depth()
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to