Author: Armin Rigo <[email protected]>
Branch: emit-call-x86
Changeset: r64310:575cd15c0384
Date: 2013-05-19 11:32 +0200
http://bitbucket.org/pypy/pypy/changeset/575cd15c0384/

Log:    In-progress.

diff --git a/rpython/jit/backend/x86/assembler.py 
b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -25,12 +25,11 @@
     RegLoc, FrameLoc, ConstFloatLoc, ImmedLoc, AddressLoc, imm,
     imm0, imm1, FloatImmedLoc, RawEbpLoc, RawEspLoc)
 from rpython.rlib.objectmodel import we_are_translated
-from rpython.jit.backend.x86 import rx86, codebuf
+from rpython.jit.backend.x86 import rx86, codebuf, callbuilder
 from rpython.jit.metainterp.resoperation import rop
 from rpython.jit.backend.x86 import support
 from rpython.rlib.debug import debug_print, debug_start, debug_stop
 from rpython.rlib import rgc
-from rpython.rlib.clibffi import FFI_DEFAULT_ABI
 from rpython.jit.backend.x86.jump import remap_frame_layout
 from rpython.jit.codewriter.effectinfo import EffectInfo
 from rpython.jit.codewriter import longlong
@@ -38,15 +37,6 @@
 from rpython.rlib.objectmodel import compute_unique_id
 
 
-# darwin requires the stack to be 16 bytes aligned on calls. Same for gcc 
4.5.0,
-# better safe than sorry
-CALL_ALIGN = 16 // WORD
-
-
-def align_stack_words(words):
-    return (words + CALL_ALIGN - 1) & ~(CALL_ALIGN-1)
-
-
 class Assembler386(BaseAssembler):
     _regalloc = None
     _output_loop_log = None
@@ -131,10 +121,10 @@
             mc.MOV_rs(esi.value, WORD*2)
             # push first arg
             mc.MOV_rr(edi.value, ebp.value)
-            align = align_stack_words(1)
+            align = callbuilder.align_stack_words(1)
             mc.SUB_ri(esp.value, (align - 1) * WORD)
         else:
-            align = align_stack_words(3)
+            align = callbuilder.align_stack_words(3)
             mc.MOV_rs(eax.value, WORD * 2)
             mc.SUB_ri(esp.value, (align - 1) * WORD)
             mc.MOV_sr(WORD, eax.value)
@@ -1014,175 +1004,9 @@
         gcrootmap = self.cpu.gc_ll_descr.gcrootmap
         return bool(gcrootmap) and not gcrootmap.is_shadow_stack
 
-    def _emit_call(self, x, arglocs, start=0, tmp=eax,
-                   argtypes=None, callconv=FFI_DEFAULT_ABI,
-                   # whether to worry about a CALL that can collect; this
-                   # is always true except in call_release_gil
-                   can_collect=True,
-                   # max number of arguments we can pass on esp; if more,
-                   # we need to decrease esp temporarily
-                   stack_max=PASS_ON_MY_FRAME):
-        #
-        if IS_X86_64:
-            return self._emit_call_64(x, arglocs, start, argtypes,
-                                      can_collect, stack_max)
-        stack_depth = 0
-        n = len(arglocs)
-        for i in range(start, n):
-            loc = arglocs[i]
-            stack_depth += loc.get_width() // WORD
-        if stack_depth > stack_max:
-            align = align_stack_words(stack_depth - stack_max)
-            self.mc.SUB_ri(esp.value, align * WORD)
-            if can_collect:
-                self.set_extra_stack_depth(self.mc, align * WORD)
-        else:
-            align = 0
-        p = 0
-        for i in range(start, n):
-            loc = arglocs[i]
-            if isinstance(loc, RegLoc):
-                if loc.is_xmm:
-                    self.mc.MOVSD_sx(p, loc.value)
-                else:
-                    self.mc.MOV_sr(p, loc.value)
-            p += loc.get_width()
-        p = 0
-        for i in range(start, n):
-            loc = arglocs[i]
-            if not isinstance(loc, RegLoc):
-                if loc.get_width() == 8:
-                    self.mc.MOVSD(xmm0, loc)
-                    self.mc.MOVSD_sx(p, xmm0.value)
-                else:
-                    self.mc.MOV(tmp, loc)
-                    self.mc.MOV_sr(p, tmp.value)
-            p += loc.get_width()
-        # x is a location
-        if can_collect:
-            # we push *now* the gcmap, describing the status of GC registers
-            # after the rearrangements done just above, ignoring the return
-            # value eax, if necessary
-            noregs = self.cpu.gc_ll_descr.is_shadow_stack()
-            gcmap = self._regalloc.get_gcmap([eax], noregs=noregs)
-            self.push_gcmap(self.mc, gcmap, store=True)
-        #
-        self.mc.CALL(x)
-        if callconv != FFI_DEFAULT_ABI:
-            self._fix_stdcall(callconv, p - align * WORD)
-        elif align:
-            self.mc.ADD_ri(esp.value, align * WORD)
-        #
-        if can_collect:
-            self._reload_frame_if_necessary(self.mc)
-            if align:
-                self.set_extra_stack_depth(self.mc, 0)
-            self.pop_gcmap(self.mc)
-
-    def _fix_stdcall(self, callconv, p):
-        from rpython.rlib.clibffi import FFI_STDCALL
-        assert callconv == FFI_STDCALL
-        # it's a bit stupid, but we're just going to cancel the fact that
-        # the called function just added 'p' to ESP, by subtracting it again.
-        self.mc.SUB_ri(esp.value, p)
-
-    def _emit_call_64(self, x, arglocs, start, argtypes,
-                      can_collect, stack_max):
-        src_locs = []
-        dst_locs = []
-        xmm_src_locs = []
-        xmm_dst_locs = []
-        singlefloats = None
-
-        # In reverse order for use with pop()
-        unused_gpr = [r9, r8, ecx, edx, esi, edi]
-        unused_xmm = [xmm7, xmm6, xmm5, xmm4, xmm3, xmm2, xmm1, xmm0]
-
-        on_stack = 0
-        # count the stack depth
-        floats = 0
-        for i in range(start, len(arglocs)):
-            arg = arglocs[i]
-            if arg.is_float() or argtypes and argtypes[i - start] == 'S':
-                floats += 1
-        all_args = len(arglocs) - start
-        stack_depth = (max(all_args - floats - len(unused_gpr), 0) +
-                       max(floats - len(unused_xmm), 0))
-        align = 0
-        if stack_depth > stack_max:
-            align = align_stack_words(stack_depth - stack_max)
-            if can_collect:
-                self.set_extra_stack_depth(self.mc, align * WORD)
-            self.mc.SUB_ri(esp.value, align * WORD)
-        for i in range(start, len(arglocs)):
-            loc = arglocs[i]
-            if loc.is_float():
-                xmm_src_locs.append(loc)
-                if len(unused_xmm) > 0:
-                    xmm_dst_locs.append(unused_xmm.pop())
-                else:
-                    xmm_dst_locs.append(RawEspLoc(on_stack * WORD, FLOAT))
-                    on_stack += 1
-            elif argtypes is not None and argtypes[i-start] == 'S':
-                # Singlefloat argument
-                if singlefloats is None:
-                    singlefloats = []
-                if len(unused_xmm) > 0:
-                    singlefloats.append((loc, unused_xmm.pop()))
-                else:
-                    singlefloats.append((loc, RawEspLoc(on_stack * WORD, INT)))
-                    on_stack += 1
-            else:
-                src_locs.append(loc)
-                if len(unused_gpr) > 0:
-                    dst_locs.append(unused_gpr.pop())
-                else:
-                    dst_locs.append(RawEspLoc(on_stack * WORD, INT))
-                    on_stack += 1
-
-        # Handle register arguments: first remap the xmm arguments
-        remap_frame_layout(self, xmm_src_locs, xmm_dst_locs,
-                           X86_64_XMM_SCRATCH_REG)
-        # Load the singlefloat arguments from main regs or stack to xmm regs
-        if singlefloats is not None:
-            for src, dst in singlefloats:
-                if isinstance(dst, RawEspLoc):
-                    # XXX too much special logic
-                    if isinstance(src, RawEbpLoc):
-                        self.mc.MOV32(X86_64_SCRATCH_REG, src)
-                        self.mc.MOV32(dst, X86_64_SCRATCH_REG)
-                    else:
-                        self.mc.MOV32(dst, src)
-                    continue
-                if isinstance(src, ImmedLoc):
-                    self.mc.MOV(X86_64_SCRATCH_REG, src)
-                    src = X86_64_SCRATCH_REG
-                self.mc.MOVD(dst, src)
-        # Finally remap the arguments in the main regs
-        # If x is a register and is in dst_locs, then oups, it needs to
-        # be moved away:
-        if x in dst_locs:
-            src_locs.append(x)
-            dst_locs.append(r10)
-            x = r10
-        remap_frame_layout(self, src_locs, dst_locs, X86_64_SCRATCH_REG)
-        if can_collect:
-            # we push *now* the gcmap, describing the status of GC registers
-            # after the rearrangements done just above, ignoring the return
-            # value eax, if necessary
-            noregs = self.cpu.gc_ll_descr.is_shadow_stack()
-            gcmap = self._regalloc.get_gcmap([eax], noregs=noregs)
-            self.push_gcmap(self.mc, gcmap, store=True)
-        #
-        self.mc.CALL(x)
-        if align:
-            self.mc.ADD_ri(esp.value, align * WORD)
-        #
-        if can_collect:
-            self._reload_frame_if_necessary(self.mc)
-            if align:
-                self.set_extra_stack_depth(self.mc, 0)
-            self.pop_gcmap(self.mc)
+    def simple_call(self, fnaddr, arglocs):
+        cb = callbuilder.CallBuilder(self, imm(fnaddr), arglocs)
+        cb.emit()
 
     def _reload_frame_if_necessary(self, mc, align_stack=False):
         gcrootmap = self.cpu.gc_ll_descr.gcrootmap
@@ -1198,10 +1022,6 @@
             self._write_barrier_fastpath(mc, wbdescr, [ebp], array=False,
                                          is_frame=True, 
align_stack=align_stack)
 
-    def call(self, addr, args, res):
-        self._emit_call(imm(addr), args)
-        assert res is eax
-
     genop_int_neg = _unaryop("NEG")
     genop_int_invert = _unaryop("NOT")
     genop_int_add = _binaryop_or_lea("ADD", True)
@@ -2003,29 +1823,20 @@
         size = sizeloc.value
         signloc = arglocs[1]
 
-        x = arglocs[2]     # the function address
-        if x is eax:
-            tmp = ecx
-        else:
-            tmp = eax
+        cb = callbuilder.CallBuilder(self, arglocs[2], arglocs[3:])
 
         descr = op.getdescr()
         assert isinstance(descr, CallDescr)
+        cb.argtypes = descr.get_arg_types()
+        cb.callconv = descr.get_call_conv()
 
-        stack_max = PASS_ON_MY_FRAME
         if is_call_release_gil:
             if self._is_asmgcc():
                 from rpython.memory.gctransform import asmgcroot
                 stack_max -= asmgcroot.JIT_USE_WORDS
-            can_collect = False
-        else:
-            can_collect = True
+            XXXXXX
 
-        self._emit_call(x, arglocs, 3, tmp=tmp,
-                        argtypes=descr.get_arg_types(),
-                        callconv=descr.get_call_conv(),
-                        can_collect=can_collect,
-                        stack_max=stack_max)
+        cb.emit()
 
         if IS_X86_32 and isinstance(resloc, FrameLoc) and resloc.type == FLOAT:
             # a float or a long long return
diff --git a/rpython/jit/backend/x86/callbuilder.py 
b/rpython/jit/backend/x86/callbuilder.py
new file mode 100644
--- /dev/null
+++ b/rpython/jit/backend/x86/callbuilder.py
@@ -0,0 +1,224 @@
+from rpython.rlib.clibffi import FFI_DEFAULT_ABI
+from rpython.jit.backend.x86.arch import (WORD, IS_X86_64, IS_X86_32,
+                                          PASS_ON_MY_FRAME)
+from rpython.jit.backend.x86.regloc import (eax, ecx, edx, ebx, esp, ebp, esi,
+    xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, r8, r9, r10, r11, edi,
+    r12, r13, r14, r15, X86_64_SCRATCH_REG, X86_64_XMM_SCRATCH_REG,
+    RegLoc)
+
+
+# darwin requires the stack to be 16 bytes aligned on calls.
+# Same for gcc 4.5.0, better safe than sorry
+CALL_ALIGN = 16 // WORD
+
+def align_stack_words(words):
+    return (words + CALL_ALIGN - 1) & ~(CALL_ALIGN-1)
+
+
+
+class AbstractCallBuilder(object):
+
+    # max number of words we have room in esp; if we need more for
+    # arguments, we need to decrease esp temporarily
+    stack_max = PASS_ON_MY_FRAME
+
+    # this can be set to guide more complex calls: gives the detailed
+    # type of the arguments
+    argtypes = None
+
+    # this is the calling convention (can be FFI_STDCALL on Windows)
+    callconv = FFI_DEFAULT_ABI
+
+    # if False, we also push the gcmap
+    is_call_release_gil = False
+
+
+    def __init__(self, assembler, fnloc, arglocs):
+        self.asm = assembler
+        self.mc = assembler.mc
+        self.fnloc = fnloc
+        self.arglocs = arglocs
+        self.current_esp = 0
+
+    def emit(self):
+        """Emit a regular call; not for CALL_RELEASE_GIL."""
+        self.prepare_arguments()
+        self.push_gcmap()
+        self.emit_raw_call()
+        self.pop_gcmap()
+        self.restore_esp()
+
+    def emit_raw_call(self):
+        self.mc.CALL(self.fnloc)
+        if self.callconv != FFI_DEFAULT_ABI:
+            self.current_esp += self._fix_stdcall(self.callconv)
+
+    def restore_esp(self):
+        if self.current_esp != 0:
+            self.mc.SUB_ri(esp.value, self.current_esp)
+            self.current_esp = 0
+
+    def push_gcmap(self):
+        # we push *now* the gcmap, describing the status of GC registers
+        # after the rearrangements done just above, ignoring the return
+        # value eax, if necessary
+        assert not self.is_call_release_gil
+        self.change_extra_stack_depth = (self.current_esp != 0)
+        if self.change_extra_stack_depth:
+            self.asm.set_extra_stack_depth(self.mc, -self.current_esp)
+        noregs = self.asm.cpu.gc_ll_descr.is_shadow_stack()
+        gcmap = self.asm._regalloc.get_gcmap([eax], noregs=noregs)
+        self.asm.push_gcmap(self.mc, gcmap, store=True)
+
+    def pop_gcmap(self):
+        assert not self.is_call_release_gil
+        self.asm._reload_frame_if_necessary(self.mc)
+        if self.change_extra_stack_depth:
+            self.asm.set_extra_stack_depth(self.mc, 0)
+        self.asm.pop_gcmap(self.mc)
+
+
+class CallBuilder32(AbstractCallBuilder):
+
+    def prepare_arguments(self):
+        arglocs = self.arglocs
+        stack_depth = 0
+        n = len(arglocs)
+        for i in range(n):
+            loc = arglocs[i]
+            stack_depth += loc.get_width() // WORD
+        if stack_depth > self.stack_max:
+            align = align_stack_words(stack_depth - self.stack_max)
+            self.current_esp -= align * WORD
+            self.mc.SUB_ri(esp.value, align * WORD)
+        #
+        p = 0
+        for i in range(n):
+            loc = arglocs[i]
+            if isinstance(loc, RegLoc):
+                if loc.is_xmm:
+                    self.mc.MOVSD_sx(p, loc.value)
+                else:
+                    self.mc.MOV_sr(p, loc.value)
+            p += loc.get_width()
+        p = 0
+        for i in range(n):
+            loc = arglocs[i]
+            if not isinstance(loc, RegLoc):
+                if loc.get_width() == 8:
+                    self.mc.MOVSD(xmm0, loc)
+                    self.mc.MOVSD_sx(p, xmm0.value)
+                else:
+                    if self.fnloc is eax:
+                        tmp = ecx
+                    else:
+                        tmp = eax
+                    self.mc.MOV(tmp, loc)
+                    self.mc.MOV_sr(p, tmp.value)
+            p += loc.get_width()
+        self.total_stack_used_by_arguments = p
+
+
+    def _fix_stdcall(self, callconv):
+        from rpython.rlib.clibffi import FFI_STDCALL
+        assert callconv == FFI_STDCALL
+        return self.total_stack_used_by_arguments
+
+
+
+class CallBuilder64(AbstractCallBuilder):
+
+    # In reverse order for use with pop()
+    unused_gpr = [r9, r8, ecx, edx, esi, edi]
+    unused_xmm = [xmm7, xmm6, xmm5, xmm4, xmm3, xmm2, xmm1, xmm0]
+
+    def prepare_arguments(self):
+        src_locs = []
+        dst_locs = []
+        xmm_src_locs = []
+        xmm_dst_locs = []
+        singlefloats = None
+
+        unused_grp = self.unused_grp[:]
+        unused_xmm = self.unused_xmm[:]
+
+        on_stack = 0
+        for i in range(len(arglocs)):
+            loc = arglocs[i]
+            if loc.is_float():
+                xmm_src_locs.append(loc)
+                if len(unused_xmm) > 0:
+                    xmm_dst_locs.append(unused_xmm.pop())
+                else:
+                    xmm_dst_locs.append(RawEspLoc(on_stack * WORD, FLOAT))
+                    on_stack += 1
+            elif argtypes is not None and argtypes[i] == 'S':
+                # Singlefloat argument
+                if singlefloats is None:
+                    singlefloats = []
+                if len(unused_xmm) > 0:
+                    singlefloats.append((loc, unused_xmm.pop()))
+                else:
+                    singlefloats.append((loc, RawEspLoc(on_stack * WORD, INT)))
+                    on_stack += 1
+            else:
+                src_locs.append(loc)
+                if len(unused_gpr) > 0:
+                    dst_locs.append(unused_gpr.pop())
+                else:
+                    dst_locs.append(RawEspLoc(on_stack * WORD, INT))
+                    on_stack += 1
+
+        if not we_are_translated():  # assert that we got the right stack depth
+            floats = 0
+            for i in range(len(arglocs)):
+                arg = arglocs[i]
+                if arg.is_float() or argtypes and argtypes[i] == 'S':
+                    floats += 1
+            all_args = len(arglocs)
+            stack_depth = (max(all_args - floats - len(unused_gpr), 0) +
+                           max(floats - len(unused_xmm), 0))
+            assert stack_depth == on_stack
+
+        align = 0
+        if on_stack > stack_max:
+            align = align_stack_words(on_stack - stack_max)
+            self.current_esp -= align * WORD
+            self.mc.SUB_ri(esp.value, align * WORD)
+
+        # Handle register arguments: first remap the xmm arguments
+        remap_frame_layout(self, xmm_src_locs, xmm_dst_locs,
+                           X86_64_XMM_SCRATCH_REG)
+        # Load the singlefloat arguments from main regs or stack to xmm regs
+        if singlefloats is not None:
+            for src, dst in singlefloats:
+                if isinstance(dst, RawEspLoc):
+                    # XXX too much special logic
+                    if isinstance(src, RawEbpLoc):
+                        self.mc.MOV32(X86_64_SCRATCH_REG, src)
+                        self.mc.MOV32(dst, X86_64_SCRATCH_REG)
+                    else:
+                        self.mc.MOV32(dst, src)
+                    continue
+                if isinstance(src, ImmedLoc):
+                    self.mc.MOV(X86_64_SCRATCH_REG, src)
+                    src = X86_64_SCRATCH_REG
+                self.mc.MOVD(dst, src)
+        # Finally remap the arguments in the main regs
+        # If x is a register and is in dst_locs, then oups, it needs to
+        # be moved away:
+        if self.fnloc in dst_locs:
+            src_locs.append(self.fnloc)
+            dst_locs.append(r10)
+            self.fnloc = r10
+        remap_frame_layout(self, src_locs, dst_locs, X86_64_SCRATCH_REG)
+
+
+    def _fix_stdcall(self, callconv):
+        assert 0     # should not occur on 64-bit
+
+
+if IS_X86_32:
+    CallBuilder = CallBuilder32
+if IS_X86_64:
+    CallBuilder = CallBuilder64
diff --git a/rpython/jit/backend/x86/regalloc.py 
b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -799,14 +799,6 @@
         self._consider_call(op, guard_op)
 
     def consider_call_release_gil(self, op, guard_op):
-        # We spill the arguments to the stack, because we need to do 3 calls:
-        # call_release_gil(), the_real_c_function(), and call_reacquire_gil().
-        # The arguments are used on the second call only.  XXX we assume
-        # that the XMM arguments won't be modified by call_release_gil().
-        for i in range(op.numargs()):
-            loc = self.loc(op.getarg(i))
-            if loc in self.rm.save_around_call_regs:
-                self.rm.force_spill_var(op.getarg(i))
         assert guard_op is not None
         self._consider_call(op, guard_op)
 
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to