Author: Armin Rigo <ar...@tunes.org> Branch: Changeset: r73744:1083964c3070 Date: 2014-09-29 11:16 +0200 http://bitbucket.org/pypy/pypy/changeset/1083964c3070/
Log: Optimize ZERO_ARRAY(const) followed by some number of SETARRAYITEM_GCs. diff --git a/rpython/jit/backend/arm/opassembler.py b/rpython/jit/backend/arm/opassembler.py --- a/rpython/jit/backend/arm/opassembler.py +++ b/rpython/jit/backend/arm/opassembler.py @@ -1180,6 +1180,9 @@ def emit_op_zero_array(self, op, arglocs, regalloc, fcond): from rpython.jit.backend.llsupport.descr import unpack_arraydescr assert len(arglocs) == 0 + length_box = op.getarg(2) + if isinstance(length_box, ConstInt) and length_box.getint() == 0: + return fcond # nothing to do itemsize, baseofs, _ = unpack_arraydescr(op.getdescr()) args = op.getarglist() base_loc = regalloc.rm.make_sure_var_in_reg(args[0], args) @@ -1191,7 +1194,6 @@ else: startindex_loc = regalloc.rm.make_sure_var_in_reg(sibox, args) startindex = -1 - length_box = op.getarg(2) # base_loc and startindex_loc are in two regs here (or they are # immediates). Compute the dstaddr_loc, which is the raw diff --git a/rpython/jit/backend/llsupport/rewrite.py b/rpython/jit/backend/llsupport/rewrite.py --- a/rpython/jit/backend/llsupport/rewrite.py +++ b/rpython/jit/backend/llsupport/rewrite.py @@ -48,7 +48,8 @@ self.known_lengths = {} self.write_barrier_applied = {} self.delayed_zero_setfields = {} - self.delayed_zero_setarrayitems = {} + self.last_zero_arrays = [] + self.setarrayitems_occurred = {} # {box: {set-of-indexes}} def rewrite(self, operations): # we can only remember one malloc since the next malloc can possibly @@ -81,6 +82,7 @@ self.handle_write_barrier_setinteriorfield(op) continue if op.getopnum() == rop.SETARRAYITEM_GC: + self.consider_setarrayitem_gc(op) self.handle_write_barrier_setarrayitem(op) continue else: @@ -89,6 +91,8 @@ # need to clal it if op.getopnum() == rop.SETFIELD_GC: self.consider_setfield_gc(op) + elif op.getopnum() == rop.SETARRAYITEM_GC: + self.consider_setarrayitem_gc(op) # ---------- call assembler ----------- if op.getopnum() == rop.CALL_ASSEMBLER: self.handle_call_assembler(op) @@ -146,6 +150,16 @@ except KeyError: pass + def consider_setarrayitem_gc(self, op): + array_box = op.getarg(0) + index_box = op.getarg(1) + if isinstance(array_box, BoxPtr) and isinstance(index_box, ConstInt): + try: + intset = self.setarrayitems_occurred[array_box] + except KeyError: + intset = self.setarrayitems_occurred[array_box] = {} + intset[index_box.getint()] = None + def clear_varsize_gc_fields(self, kind, descr, result, v_length, opnum): if self.gc_ll_descr.malloc_zero_filled: return @@ -216,18 +230,18 @@ self.clear_varsize_gc_fields(kind, op.getdescr(), op.result, v_length, op.getopnum()) - def handle_clear_array_contents(self, arraydescr, v_arr, v_length=None): - # XXX more work here to reduce or remove the ZERO_ARRAY in some cases - if v_length is None: - v_length = BoxInt() - o = ResOperation(rop.ARRAYLEN_GC, [v_arr], v_length, - descr=arraydescr) - self.newops.append(o) - elif isinstance(v_length, ConstInt) and v_length.getint() == 0: + def handle_clear_array_contents(self, arraydescr, v_arr, v_length): + assert v_length is not None + if isinstance(v_length, ConstInt) and v_length.getint() == 0: return + # the ZERO_ARRAY operation will be optimized according to what + # SETARRAYITEM_GC we see before the next allocation operation. + # See emit_pending_zeros(). o = ResOperation(rop.ZERO_ARRAY, [v_arr, self.c_zero, v_length], None, descr=arraydescr) self.newops.append(o) + if isinstance(v_length, ConstInt): + self.last_zero_arrays.append(o) def gen_malloc_frame(self, frame_info, frame, size_box): descrs = self.gc_ll_descr.getframedescrs(self.cpu) @@ -317,6 +331,31 @@ self.emit_pending_zeros() def emit_pending_zeros(self): + # First, try to rewrite the existing ZERO_ARRAY operations from + # the 'last_zero_arrays' list. Note that these operation objects + # are also already in 'newops', which is the point. + for op in self.last_zero_arrays: + assert op.getopnum() == rop.ZERO_ARRAY + box = op.getarg(0) + try: + intset = self.setarrayitems_occurred[box] + except KeyError: + continue + assert op.getarg(1).getint() == 0 # always 'start=0' initially + start = 0 + while start in intset: + start += 1 + op.setarg(1, ConstInt(start)) + stop = op.getarg(2).getint() + assert start <= stop + while stop > start and (stop - 1) in intset: + stop -= 1 + op.setarg(2, ConstInt(stop - start)) + # ^^ may be ConstInt(0); then the operation becomes a no-op + del self.last_zero_arrays[:] + self.setarrayitems_occurred.clear() + # + # Then write the ZERO_PTR_FIELDs that are still pending for v, d in self.delayed_zero_setfields.iteritems(): for ofs in d.iterkeys(): op = ResOperation(rop.ZERO_PTR_FIELD, [v, ConstInt(ofs)], None) diff --git a/rpython/jit/backend/llsupport/test/test_rewrite.py b/rpython/jit/backend/llsupport/test/test_rewrite.py --- a/rpython/jit/backend/llsupport/test/test_rewrite.py +++ b/rpython/jit/backend/llsupport/test/test_rewrite.py @@ -743,6 +743,163 @@ jump() """) + def test_zero_array_reduced_left(self): + self.check_rewrite(""" + [p1, p2] + p0 = new_array_clear(5, descr=cdescr) + setarrayitem_gc(p0, 1, p1, descr=cdescr) + setarrayitem_gc(p0, 0, p2, descr=cdescr) + jump() + """, """ + [p1, p2] + p0 = call_malloc_nursery( \ + %(cdescr.basesize + 5 * cdescr.itemsize)d) + setfield_gc(p0, 8111, descr=tiddescr) + setfield_gc(p0, 5, descr=clendescr) + zero_array(p0, 2, 3, descr=cdescr) + setarrayitem_gc(p0, 1, p1, descr=cdescr) + setarrayitem_gc(p0, 0, p2, descr=cdescr) + jump() + """) + + def test_zero_array_reduced_right(self): + self.check_rewrite(""" + [p1, p2] + p0 = new_array_clear(5, descr=cdescr) + setarrayitem_gc(p0, 3, p1, descr=cdescr) + setarrayitem_gc(p0, 4, p2, descr=cdescr) + jump() + """, """ + [p1, p2] + p0 = call_malloc_nursery( \ + %(cdescr.basesize + 5 * cdescr.itemsize)d) + setfield_gc(p0, 8111, descr=tiddescr) + setfield_gc(p0, 5, descr=clendescr) + zero_array(p0, 0, 3, descr=cdescr) + setarrayitem_gc(p0, 3, p1, descr=cdescr) + setarrayitem_gc(p0, 4, p2, descr=cdescr) + jump() + """) + + def test_zero_array_not_reduced_at_all(self): + self.check_rewrite(""" + [p1, p2] + p0 = new_array_clear(5, descr=cdescr) + setarrayitem_gc(p0, 3, p1, descr=cdescr) + setarrayitem_gc(p0, 2, p2, descr=cdescr) + setarrayitem_gc(p0, 1, p2, descr=cdescr) + jump() + """, """ + [p1, p2] + p0 = call_malloc_nursery( \ + %(cdescr.basesize + 5 * cdescr.itemsize)d) + setfield_gc(p0, 8111, descr=tiddescr) + setfield_gc(p0, 5, descr=clendescr) + zero_array(p0, 0, 5, descr=cdescr) + setarrayitem_gc(p0, 3, p1, descr=cdescr) + setarrayitem_gc(p0, 2, p2, descr=cdescr) + setarrayitem_gc(p0, 1, p2, descr=cdescr) + jump() + """) + + def test_zero_array_reduced_completely(self): + self.check_rewrite(""" + [p1, p2] + p0 = new_array_clear(5, descr=cdescr) + setarrayitem_gc(p0, 3, p1, descr=cdescr) + setarrayitem_gc(p0, 4, p2, descr=cdescr) + setarrayitem_gc(p0, 0, p1, descr=cdescr) + setarrayitem_gc(p0, 2, p2, descr=cdescr) + setarrayitem_gc(p0, 1, p2, descr=cdescr) + jump() + """, """ + [p1, p2] + p0 = call_malloc_nursery( \ + %(cdescr.basesize + 5 * cdescr.itemsize)d) + setfield_gc(p0, 8111, descr=tiddescr) + setfield_gc(p0, 5, descr=clendescr) + zero_array(p0, 5, 0, descr=cdescr) + setarrayitem_gc(p0, 3, p1, descr=cdescr) + setarrayitem_gc(p0, 4, p2, descr=cdescr) + setarrayitem_gc(p0, 0, p1, descr=cdescr) + setarrayitem_gc(p0, 2, p2, descr=cdescr) + setarrayitem_gc(p0, 1, p2, descr=cdescr) + jump() + """) + + def test_zero_array_reduced_left_with_call(self): + self.check_rewrite(""" + [p1, p2] + p0 = new_array_clear(5, descr=cdescr) + setarrayitem_gc(p0, 0, p1, descr=cdescr) + call(321321) + setarrayitem_gc(p0, 1, p2, descr=cdescr) + jump() + """, """ + [p1, p2] + p0 = call_malloc_nursery( \ + %(cdescr.basesize + 5 * cdescr.itemsize)d) + setfield_gc(p0, 8111, descr=tiddescr) + setfield_gc(p0, 5, descr=clendescr) + zero_array(p0, 1, 4, descr=cdescr) + setarrayitem_gc(p0, 0, p1, descr=cdescr) + call(321321) + cond_call_gc_wb(p0, descr=wbdescr) + setarrayitem_gc(p0, 1, p2, descr=cdescr) + jump() + """) + + def test_zero_array_reduced_left_with_label(self): + self.check_rewrite(""" + [p1, p2] + p0 = new_array_clear(5, descr=cdescr) + setarrayitem_gc(p0, 0, p1, descr=cdescr) + label(p0, p2) + setarrayitem_gc(p0, 1, p2, descr=cdescr) + jump() + """, """ + [p1, p2] + p0 = call_malloc_nursery( \ + %(cdescr.basesize + 5 * cdescr.itemsize)d) + setfield_gc(p0, 8111, descr=tiddescr) + setfield_gc(p0, 5, descr=clendescr) + zero_array(p0, 1, 4, descr=cdescr) + setarrayitem_gc(p0, 0, p1, descr=cdescr) + label(p0, p2) + cond_call_gc_wb_array(p0, 1, descr=wbdescr) + setarrayitem_gc(p0, 1, p2, descr=cdescr) + jump() + """) + + def test_zero_array_varsize(self): + self.check_rewrite(""" + [p1, p2, i3] + p0 = new_array_clear(i3, descr=bdescr) + jump() + """, """ + [p1, p2, i3] + p0 = call_malloc_nursery_varsize(0, 1, i3, descr=bdescr) + setfield_gc(p0, i3, descr=blendescr) + zero_array(p0, 0, i3, descr=bdescr) + jump() + """) + + def test_zero_array_varsize_cannot_reduce(self): + self.check_rewrite(""" + [p1, p2, i3] + p0 = new_array_clear(i3, descr=bdescr) + setarrayitem_gc(p0, 0, p1, descr=bdescr) + jump() + """, """ + [p1, p2, i3] + p0 = call_malloc_nursery_varsize(0, 1, i3, descr=bdescr) + setfield_gc(p0, i3, descr=blendescr) + zero_array(p0, 0, i3, descr=bdescr) + cond_call_gc_wb_array(p0, 0, descr=wbdescr) + setarrayitem_gc(p0, 0, p1, descr=bdescr) + jump() + """) + def test_initialization_store_potentially_large_array(self): # the write barrier cannot be omitted, because we might get # an array with cards and the GC assumes that the write diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py --- a/rpython/jit/backend/x86/regalloc.py +++ b/rpython/jit/backend/x86/regalloc.py @@ -1386,14 +1386,16 @@ def consider_zero_array(self, op): itemsize, baseofs, _ = unpack_arraydescr(op.getdescr()) + length_box = op.getarg(2) + if isinstance(length_box, ConstInt): + constbytes = length_box.getint() * itemsize + if constbytes == 0: + return # nothing to do + else: + constbytes = -1 args = op.getarglist() base_loc = self.rm.make_sure_var_in_reg(args[0], args) startindex_loc = self.rm.make_sure_var_in_reg(args[1], args) - length_box = op.getarg(2) - if isinstance(length_box, ConstInt): - constbytes = length_box.getint() * itemsize - else: - constbytes = -1 if 0 <= constbytes <= 16 * 8 and ( valid_addressing_size(itemsize) or - isinstance(startindex_loc, ImmedLoc)): _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit