Author: Matti Picus <matti.pi...@gmail.com> Branch: py3.6 Changeset: r96786:d2e310118a69 Date: 2019-06-11 08:37 +0300 http://bitbucket.org/pypy/pypy/changeset/d2e310118a69/
Log: merge default into py3.6 diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -42,3 +42,10 @@ .. branch: optimizeopt-cleanup Cleanup optimizeopt + +.. branch: copystrcontents-in-rewrite + +Remove ``copystrcontent`` and ``copyunicodecontent`` in the backends. +Instead, replace it in ``rewrite.py`` with a direct call to ``memcpy()`` and +new basic operation, ``load_effective_address``, which the backend can +even decide not to implement. diff --git a/rpython/jit/backend/arm/opassembler.py b/rpython/jit/backend/arm/opassembler.py --- a/rpython/jit/backend/arm/opassembler.py +++ b/rpython/jit/backend/arm/opassembler.py @@ -834,73 +834,11 @@ else: assert 0 - #from ../x86/regalloc.py:928 ff. - def emit_op_copystrcontent(self, op, arglocs, regalloc, fcond): - assert len(arglocs) == 0 - self._emit_copystrcontent(op, regalloc, fcond, is_unicode=False) + def emit_op_load_effective_address(self, op, arglocs, regalloc, fcond): + self._gen_address(arglocs[4], arglocs[0], arglocs[1], arglocs[3].value, + arglocs[2].value) return fcond - def emit_op_copyunicodecontent(self, op, arglocs, regalloc, fcond): - assert len(arglocs) == 0 - self._emit_copystrcontent(op, regalloc, fcond, is_unicode=True) - return fcond - - def _emit_copystrcontent(self, op, regalloc, fcond, is_unicode): - # compute the source address - args = op.getarglist() - base_loc = regalloc.rm.make_sure_var_in_reg(args[0], args) - ofs_loc = regalloc.rm.make_sure_var_in_reg(args[2], args) - assert args[0] is not args[1] # forbidden case of aliasing - srcaddr_box = TempVar() - forbidden_vars = [args[1], args[3], args[4], srcaddr_box] - srcaddr_loc = regalloc.rm.force_allocate_reg(srcaddr_box, forbidden_vars) - self._gen_address_inside_string(base_loc, ofs_loc, srcaddr_loc, - is_unicode=is_unicode) - # compute the destination address - base_loc = regalloc.rm.make_sure_var_in_reg(args[1], forbidden_vars) - ofs_loc = regalloc.rm.make_sure_var_in_reg(args[3], forbidden_vars) - forbidden_vars = [args[4], srcaddr_box] - dstaddr_box = TempVar() - dstaddr_loc = regalloc.rm.force_allocate_reg(dstaddr_box, forbidden_vars) - self._gen_address_inside_string(base_loc, ofs_loc, dstaddr_loc, - is_unicode=is_unicode) - # compute the length in bytes - length_box = args[4] - length_loc = regalloc.loc(length_box) - if is_unicode: - forbidden_vars = [srcaddr_box, dstaddr_box] - bytes_box = TempVar() - bytes_loc = regalloc.rm.force_allocate_reg(bytes_box, forbidden_vars) - scale = self._get_unicode_item_scale() - if not length_loc.is_core_reg(): - self.regalloc_mov(length_loc, bytes_loc) - length_loc = bytes_loc - assert length_loc.is_core_reg() - self.mc.MOV_ri(r.ip.value, 1 << scale) - self.mc.MUL(bytes_loc.value, r.ip.value, length_loc.value) - length_box = bytes_box - length_loc = bytes_loc - # call memcpy() - regalloc.before_call() - self.simple_call_no_collect(imm(self.memcpy_addr), - [dstaddr_loc, srcaddr_loc, length_loc]) - regalloc.rm.possibly_free_var(length_box) - regalloc.rm.possibly_free_var(dstaddr_box) - regalloc.rm.possibly_free_var(srcaddr_box) - - def _gen_address_inside_string(self, baseloc, ofsloc, resloc, is_unicode): - if is_unicode: - ofs_items, _, _ = symbolic.get_array_token(rstr.UNICODE, - self.cpu.translate_support_code) - scale = self._get_unicode_item_scale() - else: - ofs_items, itemsize, _ = symbolic.get_array_token(rstr.STR, - self.cpu.translate_support_code) - assert itemsize == 1 - ofs_items -= 1 # for the extra null character - scale = 0 - self._gen_address(resloc, baseloc, ofsloc, scale, ofs_items) - # result = base_loc + (scaled_loc << scale) + static_offset def _gen_address(self, result, base_loc, scaled_loc, scale=0, static_offset=0): assert scaled_loc.is_core_reg() @@ -915,16 +853,6 @@ self.mc.ADD_rr(result.value, base_loc.value, scaled_loc.value) self.mc.ADD_ri(result.value, result.value, static_offset) - def _get_unicode_item_scale(self): - _, itemsize, _ = symbolic.get_array_token(rstr.UNICODE, - self.cpu.translate_support_code) - if itemsize == 4: - return 2 - elif itemsize == 2: - return 1 - else: - raise AssertionError("bad unicode item size") - def store_force_descr(self, op, fail_locs, frame_depth): pos = self.mc.currpos() guard_token = self.build_guard_token(op, frame_depth, fail_locs, pos, c.AL) diff --git a/rpython/jit/backend/arm/regalloc.py b/rpython/jit/backend/arm/regalloc.py --- a/rpython/jit/backend/arm/regalloc.py +++ b/rpython/jit/backend/arm/regalloc.py @@ -873,8 +873,6 @@ prepare_op_gc_load_indexed_r = _prepare_op_gc_load_indexed prepare_op_gc_load_indexed_f = _prepare_op_gc_load_indexed - prepare_op_copystrcontent = void - prepare_op_copyunicodecontent = void prepare_op_zero_array = void def _prepare_op_same_as(self, op, fcond): @@ -899,6 +897,13 @@ resloc = self.force_allocate_reg(op) return [resloc] + def prepare_op_load_effective_address(self, op, fcond): + args = op.getarglist() + arg0 = self.make_sure_var_in_reg(args[0], args) + arg1 = self.make_sure_var_in_reg(args[1], args) + res = self.force_allocate_reg(op) + return [arg0, arg1, args[2], args[3], res] + def prepare_op_call_malloc_nursery(self, op, fcond): size_box = op.getarg(0) assert isinstance(size_box, ConstInt) diff --git a/rpython/jit/backend/arm/runner.py b/rpython/jit/backend/arm/runner.py --- a/rpython/jit/backend/arm/runner.py +++ b/rpython/jit/backend/arm/runner.py @@ -23,6 +23,7 @@ supports_floats = True supports_longlong = True supports_singlefloats = True + supports_load_effective_address = True from rpython.jit.backend.arm.arch import JITFRAME_FIXED_SIZE all_reg_indexes = range(len(all_regs)) diff --git a/rpython/jit/backend/llsupport/gc.py b/rpython/jit/backend/llsupport/gc.py --- a/rpython/jit/backend/llsupport/gc.py +++ b/rpython/jit/backend/llsupport/gc.py @@ -14,6 +14,7 @@ from rpython.jit.metainterp.support import ptr2int from rpython.jit.backend.llsupport import symbolic, jitframe from rpython.jit.backend.llsupport.symbolic import WORD +from rpython.jit.backend.llsupport.memcpy import memcpy_fn from rpython.jit.backend.llsupport.descr import SizeDescr, ArrayDescr, FieldDescr from rpython.jit.backend.llsupport.descr import GcCache, get_field_descr from rpython.jit.backend.llsupport.descr import get_array_descr @@ -36,6 +37,11 @@ self.fielddescr_vtable = get_field_descr(self, rclass.OBJECT, 'typeptr') self._generated_functions = [] + self.memcpy_fn = memcpy_fn + self.memcpy_descr = get_call_descr(self, + [lltype.Signed, lltype.Signed, lltype.Signed], lltype.Void, + EffectInfo([], [], [], [], [], [], EffectInfo.EF_CANNOT_RAISE, + can_collect=False)) def _setup_str(self): self.str_descr = get_array_descr(self, rstr.STR) diff --git a/rpython/jit/backend/llsupport/rewrite.py b/rpython/jit/backend/llsupport/rewrite.py --- a/rpython/jit/backend/llsupport/rewrite.py +++ b/rpython/jit/backend/llsupport/rewrite.py @@ -34,6 +34,10 @@ - Add COND_CALLs to the write barrier before SETFIELD_GC and SETARRAYITEM_GC operations. + - Rewrites copystrcontent to a call to memcopy + + - XXX does more than that, please write it down + '_write_barrier_applied' contains a dictionary of variable -> None. If a variable is in the dictionary, next setfields can be called without a write barrier. The idea is that an object that was freshly allocated @@ -335,6 +339,10 @@ self.emitting_an_operation_that_can_collect() elif op.getopnum() == rop.LABEL: self.emit_label() + # ---- change COPY{STR|UNICODE}CONTENT into a call ------ + if op.opnum == rop.COPYSTRCONTENT or op.opnum == rop.COPYUNICODECONTENT: + self.rewrite_copy_str_content(op) + continue # ---------- write barriers ---------- if self.gc_ll_descr.write_barrier_descr is not None: if op.getopnum() == rop.SETFIELD_GC: @@ -953,6 +961,61 @@ self.gcrefs_output_list.append(gcref) return index + def rewrite_copy_str_content(self, op): + funcaddr = llmemory.cast_ptr_to_adr(self.gc_ll_descr.memcpy_fn) + memcpy_fn = self.cpu.cast_adr_to_int(funcaddr) + memcpy_descr = self.gc_ll_descr.memcpy_descr + if op.getopnum() == rop.COPYSTRCONTENT: + basesize = self.gc_ll_descr.str_descr.basesize + # because we have one extra item after alloc, the actual address + # of string start is 1 lower, from extra_item_after_malloc + basesize -= 1 + assert self.gc_ll_descr.str_descr.itemsize == 1 + itemscale = 0 + else: + basesize = self.gc_ll_descr.unicode_descr.basesize + itemsize = self.gc_ll_descr.unicode_descr.itemsize + if itemsize == 2: + itemscale = 1 + elif itemsize == 4: + itemscale = 2 + else: + assert False, "unknown size of unicode" + i1 = self.emit_load_effective_address(op.getarg(0), op.getarg(2), + basesize, itemscale) + i2 = self.emit_load_effective_address(op.getarg(1), op.getarg(3), + basesize, itemscale) + if op.getopnum() == rop.COPYSTRCONTENT: + arg = op.getarg(4) + else: + # do some basic constant folding + if isinstance(op.getarg(4), ConstInt): + arg = ConstInt(op.getarg(4).getint() << itemscale) + else: + arg = ResOperation(rop.INT_LSHIFT, + [op.getarg(4), ConstInt(itemscale)]) + self.emit_op(arg) + self.emit_op(ResOperation(rop.CALL_N, + [ConstInt(memcpy_fn), i2, i1, arg], descr=memcpy_descr)) + + def emit_load_effective_address(self, v_gcptr, v_index, base, itemscale): + if self.cpu.supports_load_effective_address: + i1 = ResOperation(rop.LOAD_EFFECTIVE_ADDRESS, + [v_gcptr, v_index, ConstInt(base), + ConstInt(itemscale)]) + self.emit_op(i1) + return i1 + else: + if itemscale > 0: + v_index = ResOperation(rop.INT_LSHIFT, + [v_index, ConstInt(itemscale)]) + self.emit_op(v_index) + i1b = ResOperation(rop.INT_ADD, [v_gcptr, v_index]) + self.emit_op(i1b) + i1 = ResOperation(rop.INT_ADD, [i1b, ConstInt(base)]) + self.emit_op(i1) + return i1 + def remove_constptr(self, c): """Remove all ConstPtrs, and replace them with load_from_gc_table. """ diff --git a/rpython/jit/backend/llsupport/test/test_rewrite.py b/rpython/jit/backend/llsupport/test/test_rewrite.py --- a/rpython/jit/backend/llsupport/test/test_rewrite.py +++ b/rpython/jit/backend/llsupport/test/test_rewrite.py @@ -142,11 +142,16 @@ raw_sfdescr = get_array_descr(self.gc_ll_descr, RAW_SF) # strdescr = self.gc_ll_descr.str_descr + str_basesize = self.gc_ll_descr.str_descr.basesize - 1 unicodedescr = self.gc_ll_descr.unicode_descr strlendescr = strdescr.lendescr unicodelendescr = unicodedescr.lendescr strhashdescr = self.gc_ll_descr.str_hash_descr unicodehashdescr = self.gc_ll_descr.unicode_hash_descr + uni_basesize = unicodedescr.basesize + uni_itemscale = {2: 1, 4: 2}[unicodedescr.itemsize] + memcpy_fn = self.gc_ll_descr.memcpy_fn + memcpy_descr = self.gc_ll_descr.memcpy_descr casmdescr = JitCellToken() clt = FakeLoopToken() @@ -169,6 +174,7 @@ signedframedescr = self.cpu.signedframedescr floatframedescr = self.cpu.floatframedescr casmdescr.compiled_loop_token = clt + # guarddescr = AbstractFailDescr() # @@ -200,6 +206,7 @@ load_constant_offset = True load_supported_factors = (1,2,4,8) + supports_load_effective_address = True translate_support_code = None @@ -237,6 +244,9 @@ self._cache[key] = r return r + def cast_adr_to_int(self, adr): + return llmemory.AddressAsInt(adr) + class TestBoehm(RewriteTests): def setup_method(self, meth): class FakeCPU(BaseFakeCPU): @@ -1436,3 +1446,57 @@ jump() """) assert len(self.gcrefs) == 2 + + def test_rewrite_copystrcontents(self): + self.check_rewrite(""" + [p0, p1, i0, i1, i_len] + copystrcontent(p0, p1, i0, i1, i_len) + """, """ + [p0, p1, i0, i1, i_len] + i2 = load_effective_address(p0, i0, %(str_basesize)s, 0) + i3 = load_effective_address(p1, i1, %(str_basesize)s, 0) + call_n(ConstClass(memcpy_fn), i3, i2, i_len, descr=memcpy_descr) + """) + + def test_rewrite_copystrcontents_without_load_effective_address(self): + self.cpu.supports_load_effective_address = False + self.check_rewrite(""" + [p0, p1, i0, i1, i_len] + copystrcontent(p0, p1, i0, i1, i_len) + """, """ + [p0, p1, i0, i1, i_len] + i2b = int_add(p0, i0) + i2 = int_add(i2b, %(str_basesize)s) + i3b = int_add(p1, i1) + i3 = int_add(i3b, %(str_basesize)s) + call_n(ConstClass(memcpy_fn), i3, i2, i_len, descr=memcpy_descr) + """) + + def test_rewrite_copyunicodecontents(self): + self.check_rewrite(""" + [p0, p1, i0, i1, i_len] + copyunicodecontent(p0, p1, i0, i1, i_len) + """, """ + [p0, p1, i0, i1, i_len] + i2 = load_effective_address(p0, i0, %(uni_basesize)s, %(uni_itemscale)d) + i3 = load_effective_address(p1, i1, %(uni_basesize)s, %(uni_itemscale)d) + i4 = int_lshift(i_len, %(uni_itemscale)d) + call_n(ConstClass(memcpy_fn), i3, i2, i4, descr=memcpy_descr) + """) + + def test_rewrite_copyunicodecontents_without_load_effective_address(self): + self.cpu.supports_load_effective_address = False + self.check_rewrite(""" + [p0, p1, i0, i1, i_len] + copyunicodecontent(p0, p1, i0, i1, i_len) + """, """ + [p0, p1, i0, i1, i_len] + i0s = int_lshift(i0, %(uni_itemscale)d) + i2b = int_add(p0, i0s) + i2 = int_add(i2b, %(uni_basesize)s) + i1s = int_lshift(i1, %(uni_itemscale)d) + i3b = int_add(p1, i1s) + i3 = int_add(i3b, %(uni_basesize)s) + i4 = int_lshift(i_len, %(uni_itemscale)d) + call_n(ConstClass(memcpy_fn), i3, i2, i4, descr=memcpy_descr) + """) diff --git a/rpython/jit/backend/model.py b/rpython/jit/backend/model.py --- a/rpython/jit/backend/model.py +++ b/rpython/jit/backend/model.py @@ -19,6 +19,7 @@ # Boxes and Consts are BoxFloats and ConstFloats. supports_singlefloats = False supports_guard_gc_type = False + supports_load_effective_address = False propagate_exception_descr = None diff --git a/rpython/jit/backend/ppc/opassembler.py b/rpython/jit/backend/ppc/opassembler.py --- a/rpython/jit/backend/ppc/opassembler.py +++ b/rpython/jit/backend/ppc/opassembler.py @@ -966,72 +966,6 @@ pmc.overwrite() -class StrOpAssembler(object): - - _mixin_ = True - - def emit_copystrcontent(self, op, arglocs, regalloc): - self._emit_copycontent(arglocs, is_unicode=False) - - def emit_copyunicodecontent(self, op, arglocs, regalloc): - self._emit_copycontent(arglocs, is_unicode=True) - - def _emit_load_for_copycontent(self, dst, src_ptr, src_ofs, scale): - if src_ofs.is_imm(): - value = src_ofs.value << scale - if value < 32768: - self.mc.addi(dst.value, src_ptr.value, value) - else: - self.mc.load_imm(dst, value) - self.mc.add(dst.value, src_ptr.value, dst.value) - elif scale == 0: - self.mc.add(dst.value, src_ptr.value, src_ofs.value) - else: - self.mc.sldi(dst.value, src_ofs.value, scale) - self.mc.add(dst.value, src_ptr.value, dst.value) - - def _emit_copycontent(self, arglocs, is_unicode): - [src_ptr_loc, dst_ptr_loc, - src_ofs_loc, dst_ofs_loc, length_loc] = arglocs - - if is_unicode: - basesize, itemsize, _ = symbolic.get_array_token(rstr.UNICODE, - self.cpu.translate_support_code) - if itemsize == 2: scale = 1 - elif itemsize == 4: scale = 2 - else: raise AssertionError - else: - basesize, itemsize, _ = symbolic.get_array_token(rstr.STR, - self.cpu.translate_support_code) - assert itemsize == 1 - basesize -= 1 # for the extra null character - scale = 0 - - self._emit_load_for_copycontent(r.r0, src_ptr_loc, src_ofs_loc, scale) - self._emit_load_for_copycontent(r.r2, dst_ptr_loc, dst_ofs_loc, scale) - - if length_loc.is_imm(): - length = length_loc.getint() - self.mc.load_imm(r.r5, length << scale) - else: - if scale > 0: - self.mc.sldi(r.r5.value, length_loc.value, scale) - elif length_loc is not r.r5: - self.mc.mr(r.r5.value, length_loc.value) - - self.mc.mr(r.r4.value, r.r0.value) - self.mc.addi(r.r4.value, r.r4.value, basesize) - self.mc.addi(r.r3.value, r.r2.value, basesize) - - self.mc.load_imm(self.mc.RAW_CALL_REG, self.memcpy_addr) - self.mc.raw_call() - - -class UnicodeOpAssembler(object): - _mixin_ = True - # empty! - - class AllocOpAssembler(object): _mixin_ = True @@ -1336,8 +1270,7 @@ class OpAssembler(IntOpAssembler, GuardOpAssembler, MiscOpAssembler, FieldOpAssembler, - StrOpAssembler, CallOpAssembler, - UnicodeOpAssembler, ForceOpAssembler, + CallOpAssembler, ForceOpAssembler, AllocOpAssembler, FloatOpAssembler, VectorAssembler): _mixin_ = True diff --git a/rpython/jit/backend/ppc/regalloc.py b/rpython/jit/backend/ppc/regalloc.py --- a/rpython/jit/backend/ppc/regalloc.py +++ b/rpython/jit/backend/ppc/regalloc.py @@ -802,18 +802,6 @@ temp_loc = r.SCRATCH2 return [base_loc, temp_loc] - def prepare_copystrcontent(self, op): - src_ptr_loc = self.ensure_reg(op.getarg(0)) - dst_ptr_loc = self.ensure_reg(op.getarg(1)) - src_ofs_loc = self.ensure_reg_or_any_imm(op.getarg(2)) - dst_ofs_loc = self.ensure_reg_or_any_imm(op.getarg(3)) - length_loc = self.ensure_reg_or_any_imm(op.getarg(4)) - self._spill_before_call(gc_level=0) - return [src_ptr_loc, dst_ptr_loc, - src_ofs_loc, dst_ofs_loc, length_loc] - - prepare_copyunicodecontent = prepare_copystrcontent - prepare_same_as_i = helper.prepare_unary_op prepare_same_as_r = helper.prepare_unary_op prepare_same_as_f = helper.prepare_unary_op diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py --- a/rpython/jit/backend/x86/regalloc.py +++ b/rpython/jit/backend/x86/regalloc.py @@ -1222,78 +1222,16 @@ resloc = self.force_allocate_reg(op, [op.getarg(0)]) self.perform(op, [argloc], resloc) - def consider_copystrcontent(self, op): - self._consider_copystrcontent(op, is_unicode=False) - - def consider_copyunicodecontent(self, op): - self._consider_copystrcontent(op, is_unicode=True) - - def _consider_copystrcontent(self, op, is_unicode): - # compute the source address - args = op.getarglist() - base_loc = self.rm.make_sure_var_in_reg(args[0], args) - ofs_loc = self.rm.make_sure_var_in_reg(args[2], args) - assert args[0] is not args[1] # forbidden case of aliasing - srcaddr_box = TempVar() - forbidden_vars = [args[1], args[3], args[4], srcaddr_box] - srcaddr_loc = self.rm.force_allocate_reg(srcaddr_box, forbidden_vars) - self._gen_address_inside_string(base_loc, ofs_loc, srcaddr_loc, - is_unicode=is_unicode) - # compute the destination address - base_loc = self.rm.make_sure_var_in_reg(args[1], forbidden_vars) - ofs_loc = self.rm.make_sure_var_in_reg(args[3], forbidden_vars) - forbidden_vars = [args[4], srcaddr_box] - dstaddr_box = TempVar() - dstaddr_loc = self.rm.force_allocate_reg(dstaddr_box, forbidden_vars) - self._gen_address_inside_string(base_loc, ofs_loc, dstaddr_loc, - is_unicode=is_unicode) - # compute the length in bytes - length_box = args[4] - length_loc = self.loc(length_box) - if is_unicode: - forbidden_vars = [srcaddr_box, dstaddr_box] - bytes_box = TempVar() - bytes_loc = self.rm.force_allocate_reg(bytes_box, forbidden_vars) - scale = self._get_unicode_item_scale() - if not (isinstance(length_loc, ImmedLoc) or - isinstance(length_loc, RegLoc)): - self.assembler.mov(length_loc, bytes_loc) - length_loc = bytes_loc - self.assembler.load_effective_addr(length_loc, 0, scale, bytes_loc) - length_box = bytes_box - length_loc = bytes_loc - # call memcpy() - self.rm.before_call() - self.xrm.before_call() - self.assembler.simple_call_no_collect(imm(self.assembler.memcpy_addr), - [dstaddr_loc, srcaddr_loc, length_loc]) - self.rm.possibly_free_var(length_box) - self.rm.possibly_free_var(dstaddr_box) - self.rm.possibly_free_var(srcaddr_box) - - def _gen_address_inside_string(self, baseloc, ofsloc, resloc, is_unicode): - if is_unicode: - ofs_items, _, _ = symbolic.get_array_token(rstr.UNICODE, - self.translate_support_code) - scale = self._get_unicode_item_scale() - else: - ofs_items, itemsize, _ = symbolic.get_array_token(rstr.STR, - self.translate_support_code) - assert itemsize == 1 - ofs_items -= 1 # for the extra null character - scale = 0 - self.assembler.load_effective_addr(ofsloc, ofs_items, scale, - resloc, baseloc) - - def _get_unicode_item_scale(self): - _, itemsize, _ = symbolic.get_array_token(rstr.UNICODE, - self.translate_support_code) - if itemsize == 4: - return 2 - elif itemsize == 2: - return 1 - else: - raise AssertionError("bad unicode item size") + def consider_load_effective_address(self, op): + p0 = op.getarg(0) + i0 = op.getarg(1) + ploc = self.make_sure_var_in_reg(p0, [i0]) + iloc = self.make_sure_var_in_reg(i0, [p0]) + res = self.rm.force_allocate_reg(op, [p0, i0]) + assert isinstance(op.getarg(2), ConstInt) + assert isinstance(op.getarg(3), ConstInt) + self.assembler.load_effective_addr(iloc, op.getarg(2).getint(), + op.getarg(3).getint(), res, ploc) def _consider_math_read_timestamp(self, op): # hint: try to move unrelated registers away from eax and edx now diff --git a/rpython/jit/backend/x86/runner.py b/rpython/jit/backend/x86/runner.py --- a/rpython/jit/backend/x86/runner.py +++ b/rpython/jit/backend/x86/runner.py @@ -16,6 +16,7 @@ debug = True supports_floats = True supports_singlefloats = True + supports_load_effective_address = True dont_keepalive_stuff = False # for tests with_threads = False diff --git a/rpython/jit/backend/zarch/opassembler.py b/rpython/jit/backend/zarch/opassembler.py --- a/rpython/jit/backend/zarch/opassembler.py +++ b/rpython/jit/backend/zarch/opassembler.py @@ -963,75 +963,15 @@ def _mem_offset_supported(self, value): return -2**19 <= value < 2**19 - def emit_copystrcontent(self, op, arglocs, regalloc): - self._emit_copycontent(arglocs, is_unicode=False) - - def emit_copyunicodecontent(self, op, arglocs, regalloc): - self._emit_copycontent(arglocs, is_unicode=True) - - def _emit_load_for_copycontent(self, dst, src_ptr, src_ofs, scale): - if src_ofs.is_imm(): - value = src_ofs.value << scale - if check_imm_value(value): - self.mc.AGHIK(dst, src_ptr, l.imm(value)) - else: - # it is fine to use r1 here, because it will - # only hold a value before invoking the memory copy - self.mc.load_imm(r.SCRATCH, value) - self.mc.AGRK(dst, src_ptr, r.SCRATCH) - elif scale == 0: - self.mc.AGRK(dst, src_ptr, src_ofs) - else: - self.mc.SLLG(r.SCRATCH, src_ofs, l.addr(scale)) - self.mc.AGRK(dst, src_ptr, r.SCRATCH) - - def _emit_copycontent(self, arglocs, is_unicode): - [src_ptr_loc, dst_ptr_loc, - src_ofs_loc, dst_ofs_loc, length_loc] = arglocs - - if is_unicode: - basesize, itemsize, _ = symbolic.get_array_token(rstr.UNICODE, - self.cpu.translate_support_code) - if itemsize == 2: scale = 1 - elif itemsize == 4: scale = 2 - else: raise AssertionError - else: - basesize, itemsize, _ = symbolic.get_array_token(rstr.STR, - self.cpu.translate_support_code) - assert itemsize == 1 - basesize -= 1 # for the extra null character - scale = 0 - - # src and src_len are tmp registers - src = src_ptr_loc - src_len = r.odd_reg(src) - dst = r.r0 - dst_len = r.r1 - self._emit_load_for_copycontent(src, src_ptr_loc, src_ofs_loc, scale) - self._emit_load_for_copycontent(dst, dst_ptr_loc, dst_ofs_loc, scale) - - if length_loc.is_imm(): - length = length_loc.getint() - self.mc.load_imm(dst_len, length << scale) - else: - if scale > 0: - self.mc.SLLG(dst_len, length_loc, l.addr(scale)) - else: - self.mc.LGR(dst_len, length_loc) - # ensure that src_len is as long as dst_len, otherwise - # padding bytes are written to dst - self.mc.LGR(src_len, dst_len) - - self.mc.AGHI(src, l.imm(basesize)) - self.mc.AGHI(dst, l.imm(basesize)) - - # s390x has memset directly as a hardware instruction!! - # 0xB8 means we might reference dst later - self.mc.MVCLE(dst, src, l.addr(0xB8)) - # NOTE this instruction can (determined by the cpu), just - # quit the movement any time, thus it is looped until all bytes - # are copied! - self.mc.BRC(c.OF, l.imm(-self.mc.MVCLE_byte_count)) + # ...copystrcontent logic was removed, but note that + # if we want to reintroduce support for that: + # s390x has memset directly as a hardware instruction!! + # 0xB8 means we might reference dst later + #self.mc.MVCLE(dst, src, l.addr(0xB8)) + # NOTE this instruction can (determined by the cpu), just + # quit the movement any time, thus it is looped until all bytes + # are copied! + #self.mc.BRC(c.OF, l.imm(-self.mc.MVCLE_byte_count)) def emit_zero_array(self, op, arglocs, regalloc): base_loc, startindex_loc, length_loc, \ diff --git a/rpython/jit/backend/zarch/regalloc.py b/rpython/jit/backend/zarch/regalloc.py --- a/rpython/jit/backend/zarch/regalloc.py +++ b/rpython/jit/backend/zarch/regalloc.py @@ -1269,29 +1269,6 @@ loc1 = self.ensure_reg(op.getarg(1)) return [loc0, loc1] - def prepare_copystrcontent(self, op): - """ this function needs five registers. - src & src_len: are allocated using ensure_even_odd_pair. - note that these are tmp registers, thus the actual variable - value is not modified. - src_len: when entering the assembler, src_ofs_loc's value is contained - in src_len register. - """ - src_ptr_loc, _ = \ - self.rm.ensure_even_odd_pair(op.getarg(0), - None, bind_first=True, - must_exist=False, load_loc_odd=False) - src_ofs_loc = self.ensure_reg_or_any_imm(op.getarg(2)) - dst_ptr_loc = self.ensure_reg(op.getarg(1)) - dst_ofs_loc = self.ensure_reg_or_any_imm(op.getarg(3)) - length_loc = self.ensure_reg_or_any_imm(op.getarg(4)) - # no need to spill, we do not call memcpy, but we use s390x's - # hardware instruction to copy memory - return [src_ptr_loc, dst_ptr_loc, - src_ofs_loc, dst_ofs_loc, length_loc] - - prepare_copyunicodecontent = prepare_copystrcontent - def prepare_label(self, op): descr = op.getdescr() assert isinstance(descr, TargetToken) diff --git a/rpython/jit/metainterp/executor.py b/rpython/jit/metainterp/executor.py --- a/rpython/jit/metainterp/executor.py +++ b/rpython/jit/metainterp/executor.py @@ -441,6 +441,7 @@ rop.GC_STORE, rop.GC_STORE_INDEXED, rop.LOAD_FROM_GC_TABLE, + rop.LOAD_EFFECTIVE_ADDRESS, ): # list of opcodes never executed by pyjitpl continue if rop._VEC_PURE_FIRST <= value <= rop._VEC_PURE_LAST: diff --git a/rpython/jit/metainterp/optimizeopt/heap.py b/rpython/jit/metainterp/optimizeopt/heap.py --- a/rpython/jit/metainterp/optimizeopt/heap.py +++ b/rpython/jit/metainterp/optimizeopt/heap.py @@ -629,6 +629,20 @@ # and then emit the operation return self.emit(op) + def optimize_GC_LOAD_I(self, op): + # seeing a 'gc_load*' forces all the lazy sets that are still + # pending, as an approximation. We could try to be really clever + # and only force some of them, but we don't have any descr here. + self.force_all_lazy_sets() + self.make_nonnull(op.getarg(0)) + return self.emit(op) + optimize_GC_LOAD_R = optimize_GC_LOAD_I + optimize_GC_LOAD_F = optimize_GC_LOAD_I + + optimize_GC_LOAD_INDEXED_I = optimize_GC_LOAD_I + optimize_GC_LOAD_INDEXED_R = optimize_GC_LOAD_I + optimize_GC_LOAD_INDEXED_F = optimize_GC_LOAD_I + def optimize_QUASIIMMUT_FIELD(self, op): # Pattern: QUASIIMMUT_FIELD(s, descr=QuasiImmutDescr) # x = GETFIELD_GC(s, descr='inst_x') # pure diff --git a/rpython/jit/metainterp/optimizeopt/test/test_optimizeopt.py b/rpython/jit/metainterp/optimizeopt/test/test_optimizeopt.py --- a/rpython/jit/metainterp/optimizeopt/test/test_optimizeopt.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_optimizeopt.py @@ -9495,3 +9495,42 @@ jump(i3, i2, i3) """ self.optimize_loop(ops, expected) + + def test_issue3014(self): + # 'gc_load_indexed' must force 'setarrayitem_gc' + ops = """ + [i183] + p0 = new_array(5, descr=arraydescr) + setarrayitem_gc(p0, 0, i183, descr=arraydescr) + i235 = gc_load_indexed_i(p0, 0, 1, 16, 2) + escape_i(i235) + jump(i183) + """ + self.optimize_loop(ops, ops) + + def test_issue3014_2(self): + # same rules for gc_store_indexed versus getarrayitem_gc, + # and 'gc_store_indexed' invalidates the value for 'getarrayitem_gc' + # (in this direction it seems to work already) + ops = """ + [p0, i183] + i234 = getarrayitem_gc_i(p0, 0, descr=arraydescr) + gc_store_indexed(p0, 0, i183, 1, 16, 2) + i235 = getarrayitem_gc_i(p0, 0, descr=arraydescr) + escape_i(i234) + escape_i(i235) + jump(p0, i183) + """ + self.optimize_loop(ops, ops) + + def test_issue3014_3(self): + # 'gc_load' must force 'setfield_gc' + ops = """ + [i183] + p0 = new(descr=ssize) + setfield_gc(p0, i183, descr=adescr) + i235 = gc_load_i(p0, 8, 2) + escape_i(i235) + jump(i183) + """ + self.optimize_loop(ops, ops) diff --git a/rpython/jit/metainterp/resoperation.py b/rpython/jit/metainterp/resoperation.py --- a/rpython/jit/metainterp/resoperation.py +++ b/rpython/jit/metainterp/resoperation.py @@ -1055,6 +1055,8 @@ 'UNICODEGETITEM/2/i', # 'LOAD_FROM_GC_TABLE/1/r', # only emitted by rewrite.py + 'LOAD_EFFECTIVE_ADDRESS/4/i', # only emitted by rewrite.py, only if + # cpu.supports_load_effective_address. [v_gcptr,v_index,c_baseofs,c_shift] # '_ALWAYS_PURE_LAST', # ----- end of always_pure operations ----- diff --git a/rpython/rtyper/test/test_rfloat.py b/rpython/rtyper/test/test_rfloat.py --- a/rpython/rtyper/test/test_rfloat.py +++ b/rpython/rtyper/test/test_rfloat.py @@ -253,3 +253,10 @@ return compute_hash(f) res = self.interpret(fn, [1.5]) assert res == compute_hash(1.5) + + def test_float_constant_inf(self): + from rpython.rlib.rfloat import INFINITY + def fn(): + return INFINITY # float('inf') is not supported by RPython so far + res = self.interpret(fn, []) + assert res == float('inf') _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit