Author: Armin Rigo <[email protected]>
Branch: jitframe-on-heap
Changeset: r61725:3b36c7c6cacd
Date: 2013-02-24 09:47 +0100
http://bitbucket.org/pypy/pypy/changeset/3b36c7c6cacd/

Log:    asmgcc's call_release_gil: in-progress, the first test passes.

diff --git a/rpython/jit/backend/x86/arch.py b/rpython/jit/backend/x86/arch.py
--- a/rpython/jit/backend/x86/arch.py
+++ b/rpython/jit/backend/x86/arch.py
@@ -40,3 +40,4 @@
     PASS_ON_MY_FRAME = 12
     JITFRAME_FIXED_SIZE = 28 # 13 GPR + 15 XMM
 
+assert PASS_ON_MY_FRAME >= 11       # asmgcc needs at least JIT_USE_WORDS + 2
diff --git a/rpython/jit/backend/x86/assembler.py 
b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -65,7 +65,6 @@
         self.datablockwrapper = None
         self.stack_check_slowpath = 0
         self.propagate_exception_path = 0
-        self.gcrootmap_retaddr_forced = 0
         self.teardown()
 
     def set_debug(self, v):
@@ -1068,11 +1067,18 @@
                     self.implement_guard(guard_token, checkfalsecond)
         return genop_cmp_guard_float
 
+    def _is_asmgcc(self):
+        gcrootmap = self.cpu.gc_ll_descr.gcrootmap
+        return bool(gcrootmap) and not gcrootmap.is_shadow_stack
+
     def _emit_call(self, x, arglocs, start=0, tmp=eax,
-                   argtypes=None, callconv=FFI_DEFAULT_ABI, can_collect=True):
+                   argtypes=None, callconv=FFI_DEFAULT_ABI, can_collect=1,
+                   stack_max=PASS_ON_MY_FRAME):
+        if can_collect == 1 and not self._is_asmgcc():
+            can_collect = 2    # don't bother with jf_extra_stack_depth
         if IS_X86_64:
             return self._emit_call_64(x, arglocs, start, argtypes,
-                                      can_collect=can_collect)
+                                      can_collect, stack_max)
         stack_depth = 0
         n = len(arglocs)
         for i in range(start, n):
@@ -1083,11 +1089,11 @@
                 else:
                     stack_depth += 1
             stack_depth += loc.get_width() // WORD
-        if stack_depth > PASS_ON_MY_FRAME:
+        if stack_depth > stack_max:
             stack_depth = align_stack_words(stack_depth)
-            align = (stack_depth - PASS_ON_MY_FRAME)
+            align = (stack_depth - stack_max)
             self.mc.SUB_ri(esp.value, align * WORD)
-            if can_collect:
+            if can_collect == 1:
                 ofs = self.cpu.get_ofs_of_frame_field('jf_extra_stack_depth')
                 self.mc.MOV_bi(ofs, align * WORD)
         else:
@@ -1120,7 +1126,7 @@
         self.mc.CALL(x)
         if can_collect:
             self._reload_frame_if_necessary(self.mc)
-            if align:
+            if align and can_collect == 1:
                 ofs = self.cpu.get_ofs_of_frame_field('jf_extra_stack_depth')
                 self.mc.MOV_bi(ofs, 0)
             self.pop_gcmap(self.mc)
@@ -1137,7 +1143,8 @@
         # the called function just added 'p' to ESP, by subtracting it again.
         self.mc.SUB_ri(esp.value, p)
 
-    def _emit_call_64(self, x, arglocs, start, argtypes, can_collect=True):
+    def _emit_call_64(self, x, arglocs, start, argtypes,
+                      can_collect, stack_max):
         src_locs = []
         dst_locs = []
         xmm_src_locs = []
@@ -1159,10 +1166,10 @@
         stack_depth = (max(all_args - floats - len(unused_gpr), 0) +
                        max(floats - len(unused_xmm), 0))
         align = 0
-        if stack_depth > PASS_ON_MY_FRAME:
+        if stack_depth > stack_max:
             stack_depth = align_stack_words(stack_depth)
-            align = (stack_depth - PASS_ON_MY_FRAME)
-            if can_collect:
+            align = (stack_depth - stack_max)
+            if can_collect == 1:
                 ofs = self.cpu.get_ofs_of_frame_field('jf_extra_stack_depth')
                 self.mc.MOV_bi(ofs, align * WORD)
             self.mc.SUB_ri(esp.value, align * WORD)
@@ -1225,7 +1232,7 @@
         self.mc.CALL(x)
         if can_collect:
             self._reload_frame_if_necessary(self.mc)
-            if align:
+            if align and can_collect == 1:
                 ofs = self.cpu.get_ofs_of_frame_field('jf_extra_stack_depth')
                 self.mc.MOV_bi(ofs, 0)
         if align:
@@ -2030,9 +2037,19 @@
         descr = op.getdescr()
         assert isinstance(descr, CallDescr)
 
+        stack_max = PASS_ON_MY_FRAME
+        if self._is_asmgcc() and op.getopnum() == rop.CALL_RELEASE_GIL:
+            from rpython.memory.gctransform import asmgcroot
+            stack_max -= asmgcroot.JIT_USE_WORDS
+            can_collect = 2    # don't write jf_extra_stack_depth
+        else:
+            can_collect = 1
+
         self._emit_call(x, arglocs, 3, tmp=tmp,
                         argtypes=descr.get_arg_types(),
-                        callconv=descr.get_call_conv())
+                        callconv=descr.get_call_conv(),
+                        can_collect=can_collect,
+                        stack_max=stack_max)
 
         if IS_X86_32 and isinstance(resloc, FrameLoc) and resloc.type == FLOAT:
             # a float or a long long return
@@ -2103,79 +2120,36 @@
         self._emit_guard_not_forced(guard_token)
 
     def call_release_gil(self, gcrootmap, save_registers):
-        # First, we need to save away the registers listed in
-        # 'save_registers' that are not callee-save.  XXX We assume that
-        # the XMM registers won't be modified.  We store them in
-        # [ESP+4], [ESP+8], etc.; on x86-32 we leave enough room in [ESP]
-        # for the single argument to closestack_addr below.
-        if IS_X86_32:
-            p = WORD
-        elif IS_X86_64:
-            p = 0
-        for reg in self._regalloc.rm.save_around_call_regs:
-            if reg in save_registers:
-                self.mc.MOV_sr(p, reg.value)
-                p += WORD
-        #
         if gcrootmap.is_shadow_stack:
             args = []
         else:
-            # note that regalloc.py used save_all_regs=True to save all
-            # registers, so we don't have to care about saving them (other
-            # than ebp) in the close_stack_struct.  But if they are registers
-            # like %eax that would be destroyed by this call, *and* they are
-            # used by arglocs for the *next* call, then trouble; for now we
-            # will just push/pop them.
-            raise NotImplementedError
-            xxx
             from rpython.memory.gctransform import asmgcroot
-            css = self._regalloc.close_stack_struct
-            if css == 0:
-                use_words = (2 + max(asmgcroot.INDEX_OF_EBP,
-                                     asmgcroot.FRAME_PTR) + 1)
-                pos = self._regalloc.fm.reserve_location_in_frame(use_words)
-                css = get_ebp_ofs(pos + use_words - 1)
-                self._regalloc.close_stack_struct = css
-            # The location where the future CALL will put its return address
-            # will be [ESP-WORD].  But we can't use that as the next frame's
-            # top address!  As the code after releasegil() runs without the
-            # GIL, it might not be set yet by the time we need it (very
-            # unlikely), or it might be overwritten by the following call
-            # to reaquiregil() (much more likely).  So we hack even more
-            # and use a dummy location containing a dummy value (a pointer
-            # to itself) which we pretend is the return address :-/ :-/ :-/
-            # It prevents us to store any %esp-based stack locations but we
-            # don't so far.
-            adr = self.datablockwrapper.malloc_aligned(WORD, WORD)
-            rffi.cast(rffi.CArrayPtr(lltype.Signed), adr)[0] = adr
-            self.gcrootmap_retaddr_forced = adr
-            frame_ptr = css + WORD * (2+asmgcroot.FRAME_PTR)
-            if rx86.fits_in_32bits(adr):
-                self.mc.MOV_bi(frame_ptr, adr)          # MOV [css.frame], adr
-            else:
-                self.mc.MOV_ri(eax.value, adr)          # MOV EAX, adr
-                self.mc.MOV_br(frame_ptr, eax.value)    # MOV [css.frame], EAX
+            # build a 'css' structure on the stack: 2 words for the linkage,
+            # and 5/7 words as described for asmgcroot.ASM_FRAMEDATA, for a
+            # total size of JIT_USE_WORDS.  This structure is found at
+            # [ESP+css].
+            css = WORD * (PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS)
+            assert css >= 2
             # Save ebp
             index_of_ebp = css + WORD * (2+asmgcroot.INDEX_OF_EBP)
-            self.mc.MOV_br(index_of_ebp, ebp.value)     # MOV [css.ebp], EBP
-            # Call the closestack() function (also releasing the GIL)
+            self.mc.MOV_sr(index_of_ebp, ebp.value)  # MOV [css.ebp], EBP
+            # Save the "return address": we pretend that it's css
             if IS_X86_32:
                 reg = eax
             elif IS_X86_64:
                 reg = edi
-            self.mc.LEA_rb(reg.value, css)
+            self.mc.LEA_rs(reg.value, css)           # LEA reg, [css]
+            frame_ptr = css + WORD * (2+asmgcroot.FRAME_PTR)
+            self.mc.MOV_sr(frame_ptr, reg.value)     # MOV [css.frame], reg
+            # Set up jf_extra_stack_depth to pretend that the return address
+            # was at css, and so our stack frame is supposedly shorter by
+            # (css+1) words
+            extra_ofs = self.cpu.get_ofs_of_frame_field('jf_extra_stack_depth')
+            self.mc.MOV_bi(extra_ofs, (-css-1) * WORD)
+            # Call the closestack() function (also releasing the GIL)
             args = [reg]
         #
         self._emit_call(imm(self.releasegil_addr), args)
-        # Finally, restore the registers saved above.
-        if IS_X86_32:
-            p = WORD
-        elif IS_X86_64:
-            p = 0
-        for reg in self._regalloc.rm.save_around_call_regs:
-            if reg in save_registers:
-                self.mc.MOV_rs(reg.value, p)
-                p += WORD
 
     def call_reacquire_gil(self, gcrootmap, save_loc):
         # save the previous result (eax/xmm0) into the stack temporarily.
@@ -2187,18 +2161,15 @@
         if gcrootmap.is_shadow_stack:
             args = []
         else:
-            raise NotImplementedError
-            xxx
-            assert self.gcrootmap_retaddr_forced == -1, (
-                      "missing mark_gc_roots() in CALL_RELEASE_GIL")
-            self.gcrootmap_retaddr_forced = 0
-            css = self._regalloc.close_stack_struct
-            assert css != 0
+            from rpython.memory.gctransform import asmgcroot
+            extra_ofs = self.cpu.get_ofs_of_frame_field('jf_extra_stack_depth')
+            self.mc.MOV_bi(extra_ofs, 0)
+            css = WORD * (PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS)
             if IS_X86_32:
                 reg = eax
             elif IS_X86_64:
                 reg = edi
-            self.mc.LEA_rb(reg.value, css)
+            self.mc.LEA_rs(reg.value, css)
             args = [reg]
         self._emit_call(imm(self.reacqgil_addr), args)
         # restore the result from the stack
diff --git a/rpython/jit/backend/x86/regalloc.py 
b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -141,7 +141,6 @@
         self.translate_support_code = translate_support_code
         # to be read/used by the assembler too
         self.jump_target_descr = None
-        self.close_stack_struct = 0
         self.final_jump_op = None
 
     def _prepare(self, inputargs, operations, allgcrefs):
@@ -797,7 +796,17 @@
         assert guard_op is not None
         self._consider_call(op, guard_op)
 
-    consider_call_release_gil = consider_call_may_force
+    def consider_call_release_gil(self, op, guard_op):
+        # We spill the arguments to the stack, because we need to do 3 calls:
+        # call_release_gil(), the_real_c_function(), and call_reacquire_gil().
+        # The arguments are used on the second call only.  XXX we assume
+        # that the XMM arguments won't be modified by call_release_gil().
+        for i in range(op.numargs()):
+            loc = self.loc(op.getarg(i))
+            if loc in self.rm.save_around_call_regs:
+                self.rm.force_spill_var(op.getarg(i))
+        assert guard_op is not None
+        self._consider_call(op, guard_op)
 
     def consider_call_malloc_gc(self, op):
         self._consider_call(op)
diff --git a/rpython/memory/gctransform/asmgcroot.py 
b/rpython/memory/gctransform/asmgcroot.py
--- a/rpython/memory/gctransform/asmgcroot.py
+++ b/rpython/memory/gctransform/asmgcroot.py
@@ -654,6 +654,8 @@
     INDEX_OF_EBP      = 3
     FRAME_PTR         = CALLEE_SAVED_REGS    # the frame is at index 4 in the 
array
 
+JIT_USE_WORDS = 2 + FRAME_PTR + 1
+
 ASM_CALLBACK_PTR = lltype.Ptr(lltype.FuncType([], lltype.Void))
 
 # used internally by walk_stack_from()
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to