Author: Armin Rigo <ar...@tunes.org>
Branch: 
Changeset: r63460:f61197c64f0c
Date: 2013-04-17 21:33 +0200
http://bitbucket.org/pypy/pypy/changeset/f61197c64f0c/

Log:    Phew. Rewrite carefully to generate less code. I hope it was
        careful enough (the tests are happy at least).

diff --git a/rpython/jit/backend/x86/assembler.py 
b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -160,16 +160,26 @@
         self._frame_realloc_slowpath = mc.materialize(self.cpu.asmmemmgr, [])
 
     def _build_malloc_slowpath(self, kind):
-        """ While arriving on slowpath, we have a gcpattern on stack,
-        nursery_head in eax and the size in edi - eax
+        """ While arriving on slowpath, we have a gcpattern on stack 0.
+        The arguments are passed in eax and edi, as follows:
+
+        kind == 'fixed': nursery_head in eax and the size in edi - eax.
+
+        kind == 'str/unicode': length of the string to allocate in edi.
+
+        kind == 'var': length to allocate in edi, tid in eax,
+                       and itemsize in the stack 1 (position esp+WORD).
+
+        This function must preserve all registers apart from eax and edi.
         """
         assert kind in ['fixed', 'str', 'unicode', 'var']
         mc = codebuf.MachineCodeBlockWrapper()
         self._push_all_regs_to_frame(mc, [eax, edi], self.cpu.supports_floats)
+        # store the gc pattern
         ofs = self.cpu.get_ofs_of_frame_field('jf_gcmap')
-        # store the gc pattern
         mc.MOV_rs(ecx.value, WORD)
         mc.MOV_br(ofs, ecx.value)
+        #
         if kind == 'fixed':
             addr = self.cpu.gc_ll_descr.get_malloc_slowpath_addr()
         elif kind == 'str':
@@ -192,26 +202,22 @@
                 mc.MOV_rr(esi.value, ebp.value)
         elif kind == 'str' or kind == 'unicode':
             if IS_X86_32:
-                # stack layout: [---][---][---][ret][gcmap][length]...
-                mc.MOV_rs(edi.value, WORD * 5)  # pick 'length'
-                mc.MOV_sr(0, edi.value)
+                # stack layout: [---][---][---][ret].. with 3 free stack places
+                mc.MOV_sr(0, edi.value)     # store the length
             else:
-                # stack layout: [---][ret][gcmap][length]...
-                mc.MOV_rs(edi.value, WORD * 3)
+                pass                        # length already in edi
         else:
             if IS_X86_32:
-                # stack layout: [--][--][--][ret][gcmap][itemsize][length][tid]
-                mc.MOV_rs(edi.value, WORD * 5)  # pick 'itemsize'
-                mc.MOV_sr(0, edi.value)
-                mc.MOV_rs(edi.value, WORD * 7)  # pick 'tid'
-                mc.MOV_sr(WORD, edi.value)
-                mc.MOV_rs(edi.value, WORD * 6)  # pick 'length'
-                mc.MOV_sr(2 * WORD, edi.value)
+                # stack layout: [---][---][---][ret][gcmap][itemsize]...
+                mc.MOV_sr(WORD * 2, edi.value)  # store the length
+                mc.MOV_sr(WORD * 1, eax.value)  # store the tid
+                mc.MOV_rs(edi.value, WORD * 5)  # load the itemsize
+                mc.MOV_sr(WORD * 0, edi.value)  # store the itemsize
             else:
-                # stack layout: [---][ret][gcmap][itemsize][length][tid]...
-                mc.MOV_rs(edi.value, WORD * 3) # itemsize
-                mc.MOV_rs(esi.value, WORD * 5) # tid
-                mc.MOV_rs(edx.value, WORD * 4) # length
+                # stack layout: [---][ret][gcmap][itemsize]...
+                mc.MOV_rr(edx.value, edi.value) # length
+                mc.MOV_rr(esi.value, eax.value) # tid
+                mc.MOV_rs(edi.value, WORD * 3)  # load the itemsize
         self.set_extra_stack_depth(mc, 16)
         mc.CALL(imm(addr))
         mc.ADD_ri(esp.value, 16 - WORD)
@@ -2396,29 +2402,50 @@
         from rpython.jit.backend.llsupport.descr import ArrayDescr
         assert isinstance(arraydescr, ArrayDescr)
 
-        self.mc.CMP(lengthloc, imm(maxlength))
+        # lengthloc is the length of the array, which we must not modify!
+        assert lengthloc is not eax and lengthloc is not edi
+        if isinstance(lengthloc, RegLoc):
+            varsizeloc = lengthloc
+        else:
+            self.mc.MOV(edi, lengthloc)
+            varsizeloc = edi
+
+        self.mc.CMP(varsizeloc, imm(maxlength))
         self.mc.J_il8(rx86.Conditions['A'], 0) # patched later
         jmp_adr0 = self.mc.get_relative_pos()
+
         self.mc.MOV(eax, heap(nursery_free_adr))
-        self.mc.MOV(edi, lengthloc)
+        shift = size2shift(itemsize)
+        if shift < 0:
+            self.mc.IMUL_rri(edi.value, varsizeloc.value, itemsize)
+            varsizeloc = edi
+            shift = 0
+        # now varsizeloc is a register != eax.  The size of
+        # the variable part of the array is (varsizeloc << shift)
         assert arraydescr.basesize >= self.gc_minimal_size_in_nursery
-        self.mc.IMUL_ri(edi.value, itemsize)
-        header_size = self.gc_size_of_header
-        self.mc.ADD_ri(edi.value, arraydescr.basesize + header_size + WORD - 1)
-        self.mc.AND_ri(edi.value, ~(WORD - 1))
-        self.mc.ADD(edi, heap(nursery_free_adr))
+        constsize = arraydescr.basesize + self.gc_size_of_header
+        force_realignment = (itemsize % WORD) != 0
+        if force_realignment:
+            constsize += WORD - 1
+        self.mc.LEA_ra(edi.value, (eax.value, varsizeloc.value, shift,
+                                   constsize))
+        if force_realignment:
+            self.mc.AND_ri(edi.value, ~(WORD - 1))
+        # now edi contains the total size in bytes, rounded up to a multiple
+        # of WORD, plus nursery_free_adr
         self.mc.CMP(edi, heap(nursery_top_adr))
-        # write down the tid
-        self.mc.MOV(mem(eax, 0), imm(arraydescr.tid))
         self.mc.J_il8(rx86.Conditions['NA'], 0) # patched later
         jmp_adr1 = self.mc.get_relative_pos()
+        #
         offset = self.mc.get_relative_pos() - jmp_adr0
         assert 0 < offset <= 127
         self.mc.overwrite(jmp_adr0-1, chr(offset))
+        # save the gcmap
+        self.push_gcmap(self.mc, gcmap, mov=True)   # mov into RawEspLoc(0)
         if kind == rewrite.FLAG_ARRAY:
             self.mc.MOV_si(WORD, itemsize)
-            self.mc.MOV(RawEspLoc(WORD * 2, INT), lengthloc)
-            self.mc.MOV_si(WORD * 3, arraydescr.tid)
+            self.mc.MOV(edi, lengthloc)
+            self.mc.MOV_ri(eax.value, arraydescr.tid)
             addr = self.malloc_slowpath_varsize
         else:
             if kind == rewrite.FLAG_STR:
@@ -2426,14 +2453,22 @@
             else:
                 assert kind == rewrite.FLAG_UNICODE
                 addr = self.malloc_slowpath_unicode
-            self.mc.MOV(RawEspLoc(WORD, INT), lengthloc)
-        # save the gcmap
-        self.push_gcmap(self.mc, gcmap, mov=True)   # mov into RawEspLoc(0)
+            self.mc.MOV(edi, lengthloc)
         self.mc.CALL(imm(addr))
+        self.mc.JMP_l8(0)      # jump to done, patched later
+        jmp_location = self.mc.get_relative_pos()
+        #
         offset = self.mc.get_relative_pos() - jmp_adr1
         assert 0 < offset <= 127
         self.mc.overwrite(jmp_adr1-1, chr(offset))
+        # write down the tid, but not if it's the result of the CALL
+        self.mc.MOV(mem(eax, 0), imm(arraydescr.tid))
+        # while we're at it, this line is not needed if we've done the CALL
         self.mc.MOV(heap(nursery_free_adr), edi)
+        #
+        offset = self.mc.get_relative_pos() - jmp_location
+        assert 0 < offset <= 127
+        self.mc.overwrite(jmp_location - 1, chr(offset))
 
     def force_token(self, reg):
         # XXX kill me
@@ -2488,5 +2523,13 @@
     os.write(2, '[x86/asm] %s\n' % msg)
     raise NotImplementedError(msg)
 
+def size2shift(size):
+    "Return a result 0..3 such that (1<<result) == size, or -1 if impossible"
+    if size == 1: return 0
+    if size == 2: return 1
+    if size == 4: return 2
+    if size == 8: return 3
+    return -1
+
 class BridgeAlreadyCompiled(Exception):
     pass
diff --git a/rpython/jit/backend/x86/regalloc.py 
b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -876,19 +876,21 @@
         if not hasattr(gc_ll_descr, 'max_size_of_young_obj'):
             raise Exception("unreachable code")
             # for boehm, this function should never be called
+        arraydescr = op.getdescr()
         length_box = op.getarg(2)
-        arraydescr = op.getdescr()
         assert isinstance(length_box, BoxInt) # we cannot have a const here!
-        # looking at the result
+        # the result will be in eax
         self.rm.force_allocate_reg(op.result, selected_reg=eax)
-        #
-        # We need edx as a temporary, but otherwise don't save any more
-        # register.  See comments in _build_malloc_slowpath().
+        # we need edi as a temporary
         tmp_box = TempBox()
         self.rm.force_allocate_reg(tmp_box, selected_reg=edi)
-        lengthloc = self.rm.make_sure_var_in_reg(length_box, [op.result, 
tmp_box])
         gcmap = self.get_gcmap([eax, edi]) # allocate the gcmap *before*
         self.rm.possibly_free_var(tmp_box)
+        # length_box always survives: it's typically also present in the
+        # next operation that will copy it inside the new array.  It's
+        # fine to load it from the stack too, as long as it's != eax, edi.
+        lengthloc = self.rm.loc(length_box)
+        self.rm.possibly_free_var(length_box)
         #
         itemsize = op.getarg(1).getint()
         maxlength = (gc_ll_descr.max_size_of_young_obj - WORD * 2) / itemsize
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
http://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to