Author: Armin Rigo <ar...@tunes.org> Branch: Changeset: r63460:f61197c64f0c Date: 2013-04-17 21:33 +0200 http://bitbucket.org/pypy/pypy/changeset/f61197c64f0c/
Log: Phew. Rewrite carefully to generate less code. I hope it was careful enough (the tests are happy at least). diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py --- a/rpython/jit/backend/x86/assembler.py +++ b/rpython/jit/backend/x86/assembler.py @@ -160,16 +160,26 @@ self._frame_realloc_slowpath = mc.materialize(self.cpu.asmmemmgr, []) def _build_malloc_slowpath(self, kind): - """ While arriving on slowpath, we have a gcpattern on stack, - nursery_head in eax and the size in edi - eax + """ While arriving on slowpath, we have a gcpattern on stack 0. + The arguments are passed in eax and edi, as follows: + + kind == 'fixed': nursery_head in eax and the size in edi - eax. + + kind == 'str/unicode': length of the string to allocate in edi. + + kind == 'var': length to allocate in edi, tid in eax, + and itemsize in the stack 1 (position esp+WORD). + + This function must preserve all registers apart from eax and edi. """ assert kind in ['fixed', 'str', 'unicode', 'var'] mc = codebuf.MachineCodeBlockWrapper() self._push_all_regs_to_frame(mc, [eax, edi], self.cpu.supports_floats) + # store the gc pattern ofs = self.cpu.get_ofs_of_frame_field('jf_gcmap') - # store the gc pattern mc.MOV_rs(ecx.value, WORD) mc.MOV_br(ofs, ecx.value) + # if kind == 'fixed': addr = self.cpu.gc_ll_descr.get_malloc_slowpath_addr() elif kind == 'str': @@ -192,26 +202,22 @@ mc.MOV_rr(esi.value, ebp.value) elif kind == 'str' or kind == 'unicode': if IS_X86_32: - # stack layout: [---][---][---][ret][gcmap][length]... - mc.MOV_rs(edi.value, WORD * 5) # pick 'length' - mc.MOV_sr(0, edi.value) + # stack layout: [---][---][---][ret].. with 3 free stack places + mc.MOV_sr(0, edi.value) # store the length else: - # stack layout: [---][ret][gcmap][length]... - mc.MOV_rs(edi.value, WORD * 3) + pass # length already in edi else: if IS_X86_32: - # stack layout: [--][--][--][ret][gcmap][itemsize][length][tid] - mc.MOV_rs(edi.value, WORD * 5) # pick 'itemsize' - mc.MOV_sr(0, edi.value) - mc.MOV_rs(edi.value, WORD * 7) # pick 'tid' - mc.MOV_sr(WORD, edi.value) - mc.MOV_rs(edi.value, WORD * 6) # pick 'length' - mc.MOV_sr(2 * WORD, edi.value) + # stack layout: [---][---][---][ret][gcmap][itemsize]... + mc.MOV_sr(WORD * 2, edi.value) # store the length + mc.MOV_sr(WORD * 1, eax.value) # store the tid + mc.MOV_rs(edi.value, WORD * 5) # load the itemsize + mc.MOV_sr(WORD * 0, edi.value) # store the itemsize else: - # stack layout: [---][ret][gcmap][itemsize][length][tid]... - mc.MOV_rs(edi.value, WORD * 3) # itemsize - mc.MOV_rs(esi.value, WORD * 5) # tid - mc.MOV_rs(edx.value, WORD * 4) # length + # stack layout: [---][ret][gcmap][itemsize]... + mc.MOV_rr(edx.value, edi.value) # length + mc.MOV_rr(esi.value, eax.value) # tid + mc.MOV_rs(edi.value, WORD * 3) # load the itemsize self.set_extra_stack_depth(mc, 16) mc.CALL(imm(addr)) mc.ADD_ri(esp.value, 16 - WORD) @@ -2396,29 +2402,50 @@ from rpython.jit.backend.llsupport.descr import ArrayDescr assert isinstance(arraydescr, ArrayDescr) - self.mc.CMP(lengthloc, imm(maxlength)) + # lengthloc is the length of the array, which we must not modify! + assert lengthloc is not eax and lengthloc is not edi + if isinstance(lengthloc, RegLoc): + varsizeloc = lengthloc + else: + self.mc.MOV(edi, lengthloc) + varsizeloc = edi + + self.mc.CMP(varsizeloc, imm(maxlength)) self.mc.J_il8(rx86.Conditions['A'], 0) # patched later jmp_adr0 = self.mc.get_relative_pos() + self.mc.MOV(eax, heap(nursery_free_adr)) - self.mc.MOV(edi, lengthloc) + shift = size2shift(itemsize) + if shift < 0: + self.mc.IMUL_rri(edi.value, varsizeloc.value, itemsize) + varsizeloc = edi + shift = 0 + # now varsizeloc is a register != eax. The size of + # the variable part of the array is (varsizeloc << shift) assert arraydescr.basesize >= self.gc_minimal_size_in_nursery - self.mc.IMUL_ri(edi.value, itemsize) - header_size = self.gc_size_of_header - self.mc.ADD_ri(edi.value, arraydescr.basesize + header_size + WORD - 1) - self.mc.AND_ri(edi.value, ~(WORD - 1)) - self.mc.ADD(edi, heap(nursery_free_adr)) + constsize = arraydescr.basesize + self.gc_size_of_header + force_realignment = (itemsize % WORD) != 0 + if force_realignment: + constsize += WORD - 1 + self.mc.LEA_ra(edi.value, (eax.value, varsizeloc.value, shift, + constsize)) + if force_realignment: + self.mc.AND_ri(edi.value, ~(WORD - 1)) + # now edi contains the total size in bytes, rounded up to a multiple + # of WORD, plus nursery_free_adr self.mc.CMP(edi, heap(nursery_top_adr)) - # write down the tid - self.mc.MOV(mem(eax, 0), imm(arraydescr.tid)) self.mc.J_il8(rx86.Conditions['NA'], 0) # patched later jmp_adr1 = self.mc.get_relative_pos() + # offset = self.mc.get_relative_pos() - jmp_adr0 assert 0 < offset <= 127 self.mc.overwrite(jmp_adr0-1, chr(offset)) + # save the gcmap + self.push_gcmap(self.mc, gcmap, mov=True) # mov into RawEspLoc(0) if kind == rewrite.FLAG_ARRAY: self.mc.MOV_si(WORD, itemsize) - self.mc.MOV(RawEspLoc(WORD * 2, INT), lengthloc) - self.mc.MOV_si(WORD * 3, arraydescr.tid) + self.mc.MOV(edi, lengthloc) + self.mc.MOV_ri(eax.value, arraydescr.tid) addr = self.malloc_slowpath_varsize else: if kind == rewrite.FLAG_STR: @@ -2426,14 +2453,22 @@ else: assert kind == rewrite.FLAG_UNICODE addr = self.malloc_slowpath_unicode - self.mc.MOV(RawEspLoc(WORD, INT), lengthloc) - # save the gcmap - self.push_gcmap(self.mc, gcmap, mov=True) # mov into RawEspLoc(0) + self.mc.MOV(edi, lengthloc) self.mc.CALL(imm(addr)) + self.mc.JMP_l8(0) # jump to done, patched later + jmp_location = self.mc.get_relative_pos() + # offset = self.mc.get_relative_pos() - jmp_adr1 assert 0 < offset <= 127 self.mc.overwrite(jmp_adr1-1, chr(offset)) + # write down the tid, but not if it's the result of the CALL + self.mc.MOV(mem(eax, 0), imm(arraydescr.tid)) + # while we're at it, this line is not needed if we've done the CALL self.mc.MOV(heap(nursery_free_adr), edi) + # + offset = self.mc.get_relative_pos() - jmp_location + assert 0 < offset <= 127 + self.mc.overwrite(jmp_location - 1, chr(offset)) def force_token(self, reg): # XXX kill me @@ -2488,5 +2523,13 @@ os.write(2, '[x86/asm] %s\n' % msg) raise NotImplementedError(msg) +def size2shift(size): + "Return a result 0..3 such that (1<<result) == size, or -1 if impossible" + if size == 1: return 0 + if size == 2: return 1 + if size == 4: return 2 + if size == 8: return 3 + return -1 + class BridgeAlreadyCompiled(Exception): pass diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py --- a/rpython/jit/backend/x86/regalloc.py +++ b/rpython/jit/backend/x86/regalloc.py @@ -876,19 +876,21 @@ if not hasattr(gc_ll_descr, 'max_size_of_young_obj'): raise Exception("unreachable code") # for boehm, this function should never be called + arraydescr = op.getdescr() length_box = op.getarg(2) - arraydescr = op.getdescr() assert isinstance(length_box, BoxInt) # we cannot have a const here! - # looking at the result + # the result will be in eax self.rm.force_allocate_reg(op.result, selected_reg=eax) - # - # We need edx as a temporary, but otherwise don't save any more - # register. See comments in _build_malloc_slowpath(). + # we need edi as a temporary tmp_box = TempBox() self.rm.force_allocate_reg(tmp_box, selected_reg=edi) - lengthloc = self.rm.make_sure_var_in_reg(length_box, [op.result, tmp_box]) gcmap = self.get_gcmap([eax, edi]) # allocate the gcmap *before* self.rm.possibly_free_var(tmp_box) + # length_box always survives: it's typically also present in the + # next operation that will copy it inside the new array. It's + # fine to load it from the stack too, as long as it's != eax, edi. + lengthloc = self.rm.loc(length_box) + self.rm.possibly_free_var(length_box) # itemsize = op.getarg(1).getint() maxlength = (gc_ll_descr.max_size_of_young_obj - WORD * 2) / itemsize _______________________________________________ pypy-commit mailing list pypy-commit@python.org http://mail.python.org/mailman/listinfo/pypy-commit