Author: Armin Rigo <[email protected]>
Branch:
Changeset: r63460:f61197c64f0c
Date: 2013-04-17 21:33 +0200
http://bitbucket.org/pypy/pypy/changeset/f61197c64f0c/
Log: Phew. Rewrite carefully to generate less code. I hope it was
careful enough (the tests are happy at least).
diff --git a/rpython/jit/backend/x86/assembler.py
b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -160,16 +160,26 @@
self._frame_realloc_slowpath = mc.materialize(self.cpu.asmmemmgr, [])
def _build_malloc_slowpath(self, kind):
- """ While arriving on slowpath, we have a gcpattern on stack,
- nursery_head in eax and the size in edi - eax
+ """ While arriving on slowpath, we have a gcpattern on stack 0.
+ The arguments are passed in eax and edi, as follows:
+
+ kind == 'fixed': nursery_head in eax and the size in edi - eax.
+
+ kind == 'str/unicode': length of the string to allocate in edi.
+
+ kind == 'var': length to allocate in edi, tid in eax,
+ and itemsize in the stack 1 (position esp+WORD).
+
+ This function must preserve all registers apart from eax and edi.
"""
assert kind in ['fixed', 'str', 'unicode', 'var']
mc = codebuf.MachineCodeBlockWrapper()
self._push_all_regs_to_frame(mc, [eax, edi], self.cpu.supports_floats)
+ # store the gc pattern
ofs = self.cpu.get_ofs_of_frame_field('jf_gcmap')
- # store the gc pattern
mc.MOV_rs(ecx.value, WORD)
mc.MOV_br(ofs, ecx.value)
+ #
if kind == 'fixed':
addr = self.cpu.gc_ll_descr.get_malloc_slowpath_addr()
elif kind == 'str':
@@ -192,26 +202,22 @@
mc.MOV_rr(esi.value, ebp.value)
elif kind == 'str' or kind == 'unicode':
if IS_X86_32:
- # stack layout: [---][---][---][ret][gcmap][length]...
- mc.MOV_rs(edi.value, WORD * 5) # pick 'length'
- mc.MOV_sr(0, edi.value)
+ # stack layout: [---][---][---][ret].. with 3 free stack places
+ mc.MOV_sr(0, edi.value) # store the length
else:
- # stack layout: [---][ret][gcmap][length]...
- mc.MOV_rs(edi.value, WORD * 3)
+ pass # length already in edi
else:
if IS_X86_32:
- # stack layout: [--][--][--][ret][gcmap][itemsize][length][tid]
- mc.MOV_rs(edi.value, WORD * 5) # pick 'itemsize'
- mc.MOV_sr(0, edi.value)
- mc.MOV_rs(edi.value, WORD * 7) # pick 'tid'
- mc.MOV_sr(WORD, edi.value)
- mc.MOV_rs(edi.value, WORD * 6) # pick 'length'
- mc.MOV_sr(2 * WORD, edi.value)
+ # stack layout: [---][---][---][ret][gcmap][itemsize]...
+ mc.MOV_sr(WORD * 2, edi.value) # store the length
+ mc.MOV_sr(WORD * 1, eax.value) # store the tid
+ mc.MOV_rs(edi.value, WORD * 5) # load the itemsize
+ mc.MOV_sr(WORD * 0, edi.value) # store the itemsize
else:
- # stack layout: [---][ret][gcmap][itemsize][length][tid]...
- mc.MOV_rs(edi.value, WORD * 3) # itemsize
- mc.MOV_rs(esi.value, WORD * 5) # tid
- mc.MOV_rs(edx.value, WORD * 4) # length
+ # stack layout: [---][ret][gcmap][itemsize]...
+ mc.MOV_rr(edx.value, edi.value) # length
+ mc.MOV_rr(esi.value, eax.value) # tid
+ mc.MOV_rs(edi.value, WORD * 3) # load the itemsize
self.set_extra_stack_depth(mc, 16)
mc.CALL(imm(addr))
mc.ADD_ri(esp.value, 16 - WORD)
@@ -2396,29 +2402,50 @@
from rpython.jit.backend.llsupport.descr import ArrayDescr
assert isinstance(arraydescr, ArrayDescr)
- self.mc.CMP(lengthloc, imm(maxlength))
+ # lengthloc is the length of the array, which we must not modify!
+ assert lengthloc is not eax and lengthloc is not edi
+ if isinstance(lengthloc, RegLoc):
+ varsizeloc = lengthloc
+ else:
+ self.mc.MOV(edi, lengthloc)
+ varsizeloc = edi
+
+ self.mc.CMP(varsizeloc, imm(maxlength))
self.mc.J_il8(rx86.Conditions['A'], 0) # patched later
jmp_adr0 = self.mc.get_relative_pos()
+
self.mc.MOV(eax, heap(nursery_free_adr))
- self.mc.MOV(edi, lengthloc)
+ shift = size2shift(itemsize)
+ if shift < 0:
+ self.mc.IMUL_rri(edi.value, varsizeloc.value, itemsize)
+ varsizeloc = edi
+ shift = 0
+ # now varsizeloc is a register != eax. The size of
+ # the variable part of the array is (varsizeloc << shift)
assert arraydescr.basesize >= self.gc_minimal_size_in_nursery
- self.mc.IMUL_ri(edi.value, itemsize)
- header_size = self.gc_size_of_header
- self.mc.ADD_ri(edi.value, arraydescr.basesize + header_size + WORD - 1)
- self.mc.AND_ri(edi.value, ~(WORD - 1))
- self.mc.ADD(edi, heap(nursery_free_adr))
+ constsize = arraydescr.basesize + self.gc_size_of_header
+ force_realignment = (itemsize % WORD) != 0
+ if force_realignment:
+ constsize += WORD - 1
+ self.mc.LEA_ra(edi.value, (eax.value, varsizeloc.value, shift,
+ constsize))
+ if force_realignment:
+ self.mc.AND_ri(edi.value, ~(WORD - 1))
+ # now edi contains the total size in bytes, rounded up to a multiple
+ # of WORD, plus nursery_free_adr
self.mc.CMP(edi, heap(nursery_top_adr))
- # write down the tid
- self.mc.MOV(mem(eax, 0), imm(arraydescr.tid))
self.mc.J_il8(rx86.Conditions['NA'], 0) # patched later
jmp_adr1 = self.mc.get_relative_pos()
+ #
offset = self.mc.get_relative_pos() - jmp_adr0
assert 0 < offset <= 127
self.mc.overwrite(jmp_adr0-1, chr(offset))
+ # save the gcmap
+ self.push_gcmap(self.mc, gcmap, mov=True) # mov into RawEspLoc(0)
if kind == rewrite.FLAG_ARRAY:
self.mc.MOV_si(WORD, itemsize)
- self.mc.MOV(RawEspLoc(WORD * 2, INT), lengthloc)
- self.mc.MOV_si(WORD * 3, arraydescr.tid)
+ self.mc.MOV(edi, lengthloc)
+ self.mc.MOV_ri(eax.value, arraydescr.tid)
addr = self.malloc_slowpath_varsize
else:
if kind == rewrite.FLAG_STR:
@@ -2426,14 +2453,22 @@
else:
assert kind == rewrite.FLAG_UNICODE
addr = self.malloc_slowpath_unicode
- self.mc.MOV(RawEspLoc(WORD, INT), lengthloc)
- # save the gcmap
- self.push_gcmap(self.mc, gcmap, mov=True) # mov into RawEspLoc(0)
+ self.mc.MOV(edi, lengthloc)
self.mc.CALL(imm(addr))
+ self.mc.JMP_l8(0) # jump to done, patched later
+ jmp_location = self.mc.get_relative_pos()
+ #
offset = self.mc.get_relative_pos() - jmp_adr1
assert 0 < offset <= 127
self.mc.overwrite(jmp_adr1-1, chr(offset))
+ # write down the tid, but not if it's the result of the CALL
+ self.mc.MOV(mem(eax, 0), imm(arraydescr.tid))
+ # while we're at it, this line is not needed if we've done the CALL
self.mc.MOV(heap(nursery_free_adr), edi)
+ #
+ offset = self.mc.get_relative_pos() - jmp_location
+ assert 0 < offset <= 127
+ self.mc.overwrite(jmp_location - 1, chr(offset))
def force_token(self, reg):
# XXX kill me
@@ -2488,5 +2523,13 @@
os.write(2, '[x86/asm] %s\n' % msg)
raise NotImplementedError(msg)
+def size2shift(size):
+ "Return a result 0..3 such that (1<<result) == size, or -1 if impossible"
+ if size == 1: return 0
+ if size == 2: return 1
+ if size == 4: return 2
+ if size == 8: return 3
+ return -1
+
class BridgeAlreadyCompiled(Exception):
pass
diff --git a/rpython/jit/backend/x86/regalloc.py
b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -876,19 +876,21 @@
if not hasattr(gc_ll_descr, 'max_size_of_young_obj'):
raise Exception("unreachable code")
# for boehm, this function should never be called
+ arraydescr = op.getdescr()
length_box = op.getarg(2)
- arraydescr = op.getdescr()
assert isinstance(length_box, BoxInt) # we cannot have a const here!
- # looking at the result
+ # the result will be in eax
self.rm.force_allocate_reg(op.result, selected_reg=eax)
- #
- # We need edx as a temporary, but otherwise don't save any more
- # register. See comments in _build_malloc_slowpath().
+ # we need edi as a temporary
tmp_box = TempBox()
self.rm.force_allocate_reg(tmp_box, selected_reg=edi)
- lengthloc = self.rm.make_sure_var_in_reg(length_box, [op.result,
tmp_box])
gcmap = self.get_gcmap([eax, edi]) # allocate the gcmap *before*
self.rm.possibly_free_var(tmp_box)
+ # length_box always survives: it's typically also present in the
+ # next operation that will copy it inside the new array. It's
+ # fine to load it from the stack too, as long as it's != eax, edi.
+ lengthloc = self.rm.loc(length_box)
+ self.rm.possibly_free_var(length_box)
#
itemsize = op.getarg(1).getint()
maxlength = (gc_ll_descr.max_size_of_young_obj - WORD * 2) / itemsize
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit