Author: Armin Rigo <ar...@tunes.org> Branch: optresult-unroll Changeset: r79480:9d419227611e Date: 2015-09-06 16:49 +0200 http://bitbucket.org/pypy/pypy/changeset/9d419227611e/
Log: in-progress diff --git a/rpython/jit/backend/arm/assembler.py b/rpython/jit/backend/arm/assembler.py --- a/rpython/jit/backend/arm/assembler.py +++ b/rpython/jit/backend/arm/assembler.py @@ -708,7 +708,7 @@ self.fixup_target_tokens(rawstart) self.update_frame_depth(frame_depth) if logger: - logger.log_bridge(inputargs, operations, "rewritten", + logger.log_bridge(inputargs, operations, "rewritten", faildescr, ops_offset=ops_offset) self.teardown() @@ -935,9 +935,9 @@ op = operations[i] self.mc.mark_op(op) opnum = op.getopnum() - if op.has_no_side_effect() and op.result not in regalloc.longevity: + if op.has_no_side_effect() and op not in regalloc.longevity: regalloc.possibly_free_vars_for_op(op) - elif not we_are_translated() and op.getopnum() == -124: + elif not we_are_translated() and op.getopnum() == -127: regalloc.prepare_force_spill(op, fcond) else: arglocs = regalloc_operations[opnum](regalloc, op, fcond) @@ -947,7 +947,7 @@ assert fcond is not None if op.is_guard(): regalloc.possibly_free_vars(op.getfailargs()) - if op.result: + if op.type != 'v': regalloc.possibly_free_var(op.result) regalloc.possibly_free_vars_for_op(op) regalloc.free_temp_vars() diff --git a/rpython/jit/backend/arm/opassembler.py b/rpython/jit/backend/arm/opassembler.py --- a/rpython/jit/backend/arm/opassembler.py +++ b/rpython/jit/backend/arm/opassembler.py @@ -49,6 +49,8 @@ def emit_op_int_add(self, op, arglocs, regalloc, fcond): return self.int_add_impl(op, arglocs, regalloc, fcond) + emit_op_nursery_ptr_increment = emit_op_int_add + def int_add_impl(self, op, arglocs, regalloc, fcond, flags=False): l0, l1, res = arglocs if flags: @@ -253,28 +255,102 @@ def emit_op_guard_class(self, op, arglocs, regalloc, fcond): self._cmp_guard_class(op, arglocs, regalloc, fcond) self.guard_success_cc = c.EQ - self._emit_guard(op, arglocs[3:], save_exc=False) + self._emit_guard(op, arglocs[2:], save_exc=False) return fcond def emit_op_guard_nonnull_class(self, op, arglocs, regalloc, fcond): self.mc.CMP_ri(arglocs[0].value, 1) self._cmp_guard_class(op, arglocs, regalloc, c.HS) self.guard_success_cc = c.EQ - self._emit_guard(op, arglocs[3:], save_exc=False) + self._emit_guard(op, arglocs[2:], save_exc=False) return fcond def _cmp_guard_class(self, op, locs, regalloc, fcond): - offset = locs[2] + offset = self.cpu.vtable_offset if offset is not None: - self.mc.LDR_ri(r.ip.value, locs[0].value, offset.value, cond=fcond) + self.mc.LDR_ri(r.ip.value, locs[0].value, offset, cond=fcond) self.mc.CMP_rr(r.ip.value, locs[1].value, cond=fcond) else: typeid = locs[1] - self.mc.LDRH_ri(r.ip.value, locs[0].value, cond=fcond) - if typeid.is_imm(): - self.mc.CMP_ri(r.ip.value, typeid.value, cond=fcond) - else: - self.mc.CMP_rr(r.ip.value, typeid.value, cond=fcond) + assert typeid.is_imm() + expected_typeid = (self.cpu.gc_ll_descr + .get_typeid_from_classptr_if_gcremovetypeptr(typeid.value)) + self._cmp_guard_gc_type(locs[0], expected_typeid, fcond) + + def _cmp_guard_gc_type(self, loc_ptr, expected_typeid, fcond=c.AL): + # Note that the typeid half-word is at offset 0 on a little-endian + # machine; it would be at offset 2 or 4 on a big-endian machine. + assert self.cpu.supports_guard_gc_type + assert 0 <= expected_typeid <= 0xFFFF + self.mc.LDRH_ri(r.ip.value, loc_ptr.value, 0, + cond=fcond) + xxxxxx #ENCODING NOT SUPPORTED HERE? + self.mc.SUB_ri(r.ip.value, r.ip.value, expected_typeid & 0xFF00, + cond=fcond) + self.mc.CMP_ri(r.ip.value, expected_typeid & 0xFF, + cond=fcond) + + def emit_op_guard_gc_type(self, op, arglocs, regalloc, fcond): + self._cmp_guard_gc_type(arglocs[0], arglocs[1].value) + self.guard_success_cc = c.EQ + self._emit_guard(op, arglocs[2:], save_exc=False) + return fcond + + def emit_op_guard_is_object(self, op, arglocs, regalloc, fcond): + assert self.cpu.supports_guard_gc_type + loc_object = arglocs[0] + loc_base_type_info = arglocs[1] + # idea: read the typeid, fetch one byte of the field 'infobits' from + # the big typeinfo table, and check the flag 'T_IS_RPYTHON_INSTANCE'. + self.mc.LDRH_ri(r.ip.value, loc_object.value) + # + base_type_info, shift_by, sizeof_ti = ( + self.cpu.gc_ll_descr.get_translated_info_for_typeinfo()) + infobits_offset, IS_OBJECT_FLAG = ( + self.cpu.gc_ll_descr.get_translated_info_for_guard_is_object()) + + if shift_by > 0: + self.mc.LSL_ri(r.ip.value, r.ip.value, shift_by) + self.mc.LDRB_ri(r.ip.value, loc_base_type_info, r.ip.value) + self.mc.TST_ri(r.ip.value, imm=IS_OBJECT_FLAG) + self.guard_success_cc = c.NE + self._emit_guard(op, arglocs[2:], save_exc=False) + return fcond + + def emit_op_guard_subclass(self, op, arglocs, regalloc, fcond): + assert self.cpu.supports_guard_gc_type + loc_object = arglocs[0] + loc_check_against_class = arglocs[1] + loc_ofs_subclassrange_min = arglocs[2] + offset = self.cpu.vtable_offset + offset2 = self.cpu.subclassrange_min_offset + if offset is not None: + # read this field to get the vtable pointer + self.mc.LDR_ri(r.ip.value, loc_object.value, imm=offset) + # read the vtable's subclassrange_min field + self.mc.LDR_ri(r.ip.value, r.ip.value, imm=offset2) + else: + # read the typeid + self.mc.LDRH_ri(r.ip.value, loc_object.value) + # read the vtable's subclassrange_min field, as a single + # step with the correct offset + base_type_info, shift_by, sizeof_ti = ( + self.cpu.gc_ll_descr.get_translated_info_for_typeinfo()) + if shift_by > 0: + self.mc.LSL_ri(r.ip.value, r.ip.value, shift_by) + self.mc.LDR_ri(r.ip.value, loc_ofs_subclassrange_min.value, + r.ip.value) + # get the two bounds to check against + vtable_ptr = loc_check_against_class.getint() + vtable_ptr = rffi.cast(rclass.CLASSTYPE, vtable_ptr) + check_min = vtable_ptr.subclassrange_min + check_max = vtable_ptr.subclassrange_max + # check by doing the unsigned comparison (tmp - min) < (max - min) + self.mc.SUB_ri(r.ip.value, r.ip.value, check_min) + self.mc.CMP_ri(r.ip.value, check_max - check_min) + # the guard passes if we get a result of "below" + self.guard_success_cc = c.LO + self.implement_guard(guard_token) def emit_op_guard_not_invalidated(self, op, locs, regalloc, fcond): return self._emit_guard(op, locs, save_exc=False, @@ -365,8 +441,12 @@ self.gen_func_epilog() return fcond - def emit_op_call(self, op, arglocs, regalloc, fcond): + def _genop_call(self, op, arglocs, regalloc, fcond): return self._emit_call(op, arglocs, fcond=fcond) + emit_op_call_i = _genop_call + emit_op_call_r = _genop_call + emit_op_call_f = _genop_call + emit_op_call_n = _genop_call def _emit_call(self, op, arglocs, is_call_release_gil=False, fcond=c.AL): # args = [resloc, size, sign, args...] @@ -396,14 +476,17 @@ cb.emit() return fcond - def emit_op_same_as(self, op, arglocs, regalloc, fcond): + def _genop_same_as(self, op, arglocs, regalloc, fcond): argloc, resloc = arglocs if argloc is not resloc: self.mov_loc_loc(argloc, resloc) return fcond - emit_op_cast_ptr_to_int = emit_op_same_as - emit_op_cast_int_to_ptr = emit_op_same_as + emit_op_same_as_i = _genop_same_as + emit_op_same_as_r = _genop_same_as + emit_op_same_as_f = _genop_same_as + emit_op_cast_ptr_to_int = _genop_same_as + emit_op_cast_int_to_ptr = _genop_same_as def emit_op_guard_no_exception(self, op, arglocs, regalloc, fcond): loc = arglocs[0] @@ -574,7 +657,7 @@ emit_op_setfield_raw = emit_op_setfield_gc emit_op_zero_ptr_field = emit_op_setfield_gc - def emit_op_getfield_gc(self, op, arglocs, regalloc, fcond): + def _genop_getfield(self, op, arglocs, regalloc, fcond): base_loc, ofs, res, size = arglocs signed = op.getdescr().is_field_signed() scale = get_scale(size.value) @@ -592,7 +675,7 @@ self.mc.STR_ri(value_loc.value, base_loc.value, 0, cond=fcond) return fcond - def emit_op_getinteriorfield_gc(self, op, arglocs, regalloc, fcond): + def _genop_interiorfield(self, op, arglocs, regalloc, fcond): (base_loc, index_loc, res_loc, ofs_loc, ofs, itemsize, fieldsize) = arglocs scale = get_scale(fieldsize.value) @@ -613,6 +696,10 @@ imm(scale), signed, fcond) return fcond + emit_op_getinteriorfield_gc_i = _genop_getinteriorfield + emit_op_getinteriorfield_gc_r = _genop_getinteriorfield + emit_op_getinteriorfield_gc_f = _genop_getinteriorfield + def emit_op_setinteriorfield_gc(self, op, arglocs, regalloc, fcond): (base_loc, index_loc, value_loc, ofs_loc, ofs, itemsize, fieldsize) = arglocs @@ -697,12 +784,13 @@ self._write_to_mem(value_loc, base_loc, ofs_loc, scale, fcond) return fcond - def emit_op_getarrayitem_gc(self, op, arglocs, regalloc, fcond): + def _genop_getarrayitem(self, op, arglocs, regalloc, fcond): res_loc, base_loc, ofs_loc, scale, ofs = arglocs assert ofs_loc.is_core_reg() signed = op.getdescr().is_item_signed() # scale the offset as required + # XXX we should try to encode the scale inside the "shift" part of LDR if scale.value > 0: self.mc.LSL_ri(r.ip.value, ofs_loc.value, scale.value) ofs_loc = r.ip @@ -714,6 +802,17 @@ self._load_from_mem(res_loc, base_loc, ofs_loc, scale, signed, fcond) return fcond + emit_op_getarrayitem_gc_i = _genop_getarrayitem + emit_op_getarrayitem_gc_r = _genop_getarrayitem + emit_op_getarrayitem_gc_f = _genop_getarrayitem + emit_op_getarrayitem_gc_pure_i = _genop_getarrayitem + emit_op_getarrayitem_gc_pure_r = _genop_getarrayitem + emit_op_getarrayitem_gc_pure_f = _genop_getarrayitem + emit_op_getarrayitem_raw_i = _genop_getarrayitem + emit_op_getarrayitem_raw_f = _genop_getarrayitem + emit_op_getarrayitem_raw_pure_i = _genop_getarrayitem + emit_op_getarrayitem_raw_pure_f = _genop_getarrayitem + def _load_from_mem(self, res_loc, base_loc, ofs_loc, scale, signed=False, fcond=c.AL): if scale.value == 3: @@ -771,10 +870,7 @@ else: assert 0 - emit_op_getarrayitem_raw = emit_op_getarrayitem_gc - emit_op_getarrayitem_gc_pure = emit_op_getarrayitem_gc - - def emit_op_raw_load(self, op, arglocs, regalloc, fcond): + def _genop_raw_load(self, op, arglocs, regalloc, fcond): res_loc, base_loc, ofs_loc, scale, ofs = arglocs assert ofs_loc.is_core_reg() # no base offset @@ -783,6 +879,9 @@ self._load_from_mem(res_loc, base_loc, ofs_loc, scale, signed, fcond) return fcond + emit_op_raw_load_i = _genop_raw_load + emit_op_raw_load_f = _genop_raw_load + def emit_op_strlen(self, op, arglocs, regalloc, fcond): l0, l1, res = arglocs if l1.is_imm(): @@ -952,7 +1051,7 @@ def imm(self, v): return imm(v) - def emit_op_call_assembler(self, op, arglocs, regalloc, fcond): + def _genop_call_assembler(self, op, arglocs, regalloc, fcond): if len(arglocs) == 4: [argloc, vloc, result_loc, tmploc] = arglocs else: @@ -961,6 +1060,10 @@ self._store_force_index(self._find_nearby_operation(+1)) self.call_assembler(op, argloc, vloc, result_loc, tmploc) return fcond + emit_op_call_assembler_i = _genop_call_assembler + emit_op_call_assembler_r = _genop_call_assembler + emit_op_call_assembler_f = _genop_call_assembler + emit_op_call_assembler_n = _genop_call_assembler def _call_assembler_emit_call(self, addr, argloc, resloc): ofs = self.saved_threadlocal_addr @@ -991,9 +1094,9 @@ return pos def _call_assembler_load_result(self, op, result_loc): - if op.result is not None: + if op.type != 'v': # load the return value from (tmploc, 0) - kind = op.result.type + kind = op.type descr = self.cpu.getarraydescr_for_frame(kind) if kind == FLOAT: ofs = self.cpu.unpack_arraydescr(descr) @@ -1041,15 +1144,23 @@ self._emit_guard(op, arglocs, save_exc=True, is_guard_not_forced=True) return fcond - def emit_op_call_may_force(self, op, arglocs, regalloc, fcond): + def _genop_call_may_force(self, op, arglocs, regalloc, fcond): self._store_force_index(self._find_nearby_operation(+1)) self._emit_call(op, arglocs, fcond=fcond) return fcond + emit_op_call_may_force_i = _genop_call_may_force + emit_op_call_may_force_r = _genop_call_may_force + emit_op_call_may_force_f = _genop_call_may_force + emit_op_call_may_force_n = _genop_call_may_force - def emit_op_call_release_gil(self, op, arglocs, regalloc, fcond): + def _genop_call_release_gil(self, op, arglocs, regalloc, fcond): self._store_force_index(self._find_nearby_operation(+1)) self._emit_call(op, arglocs, is_call_release_gil=True) return fcond + emit_op_call_release_gil_i = _genop_call_release_gil + emit_op_call_release_gil_r = _genop_call_release_gil + emit_op_call_release_gil_f = _genop_call_release_gil + emit_op_call_release_gil_n = _genop_call_release_gil def _store_force_index(self, guard_op): assert (guard_op.getopnum() == rop.GUARD_NOT_FORCED or diff --git a/rpython/jit/backend/arm/regalloc.py b/rpython/jit/backend/arm/regalloc.py --- a/rpython/jit/backend/arm/regalloc.py +++ b/rpython/jit/backend/arm/regalloc.py @@ -24,8 +24,7 @@ from rpython.jit.backend.arm.arch import WORD, JITFRAME_FIXED_SIZE from rpython.jit.codewriter import longlong from rpython.jit.metainterp.history import (Const, ConstInt, ConstFloat, - ConstPtr, BoxInt, - Box, BoxPtr, + ConstPtr, INT, REF, FLOAT) from rpython.jit.metainterp.history import TargetToken from rpython.jit.metainterp.resoperation import rop @@ -689,8 +688,8 @@ arg0 = ConstInt(rffi.cast(lltype.Signed, op.getarg(0).getint())) loc = self.make_sure_var_in_reg(arg0) loc1 = self.get_scratch_reg(INT, boxes) - if op.result in self.longevity: - resloc = self.force_allocate_reg(op.result, boxes) + if op in self.longevity: + resloc = self.force_allocate_reg(op, boxes) self.possibly_free_var(op.result) else: resloc = None @@ -706,55 +705,23 @@ return arglocs def prepare_op_guard_class(self, op, fcond): - return self._prepare_guard_class(op, fcond) - - prepare_op_guard_nonnull_class = prepare_op_guard_class - - def _prepare_guard_class(self, op, fcond): assert not isinstance(op.getarg(0), Const) boxes = op.getarglist() x = self.make_sure_var_in_reg(boxes[0], boxes) y_val = rffi.cast(lltype.Signed, op.getarg(1).getint()) - arglocs = [x, None, None] + arglocs = [x, imm(y_val)] offset = self.cpu.vtable_offset if offset is not None: y = self.get_scratch_reg(INT, forbidden_vars=boxes) - self.assembler.load(y, imm(y_val)) - - assert check_imm_arg(offset) - offset_loc = imm(offset) - + self.assembler.load(y, arglocs[1]) arglocs[1] = y - arglocs[2] = offset_loc - else: - # XXX hard-coded assumption: to go from an object to its class - # we use the following algorithm: - # - read the typeid from mem(locs[0]), i.e. at offset 0 - # - keep the lower 16 bits read there - # - multiply by 4 and use it as an offset in type_info_group - # - add 16 bytes, to go past the TYPE_INFO structure - classptr = y_val - # here, we have to go back from 'classptr' to the value expected - # from reading the 16 bits in the object header - from rpython.memory.gctypelayout import GCData - sizeof_ti = rffi.sizeof(GCData.TYPE_INFO) - type_info_group = llop.gc_get_type_info_group(llmemory.Address) - type_info_group = rffi.cast(lltype.Signed, type_info_group) - expected_typeid = classptr - sizeof_ti - type_info_group - expected_typeid >>= 2 - if check_imm_arg(expected_typeid): - arglocs[1] = imm(expected_typeid) - else: - y = self.get_scratch_reg(INT, forbidden_vars=boxes) - self.assembler.load(y, imm(expected_typeid)) - arglocs[1] = y return self._prepare_guard(op, arglocs) - return arglocs + prepare_op_guard_nonnull_class = prepare_op_guard_class def compute_hint_frame_locations(self, operations): # optimization only: fill in the 'hint_frame_locations' dictionary @@ -782,7 +749,7 @@ assert len(arglocs) == jump_op.numargs() for i in range(jump_op.numargs()): box = jump_op.getarg(i) - if isinstance(box, Box): + if not isinstance(box, Const): loc = arglocs[i] if loc is not None and loc.is_stack(): self.frame_manager.hint_frame_pos[box] = ( @@ -1115,7 +1082,7 @@ # for boehm, this function should never be called arraydescr = op.getdescr() length_box = op.getarg(2) - assert isinstance(length_box, BoxInt) # we cannot have a const here! + assert not isinstance(length_box, Const) # we cannot have a const here! # the result will be in r0 self.rm.force_allocate_reg(op.result, selected_reg=r.r0) # we need r1 as a temporary @@ -1194,14 +1161,14 @@ # of some guard position = self.rm.position for arg in inputargs: - assert isinstance(arg, Box) + assert not isinstance(arg, Const) if self.last_real_usage.get(arg, -1) <= position: self.force_spill_var(arg) # for i in range(len(inputargs)): arg = inputargs[i] - assert isinstance(arg, Box) + assert not isinstance(arg, Const) loc = self.loc(arg) arglocs[i] = loc if loc.is_core_reg() or loc.is_vfp_reg(): _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit