Author: Philip Jenvey <pjen...@underboss.org> Branch: py3k Changeset: r74769:f682ccbce64f Date: 2014-11-30 14:12 -0800 http://bitbucket.org/pypy/pypy/changeset/f682ccbce64f/
Log: merge default diff too long, truncating to 2000 out of 3147 lines diff --git a/pypy/module/_cffi_backend/ctypeprim.py b/pypy/module/_cffi_backend/ctypeprim.py --- a/pypy/module/_cffi_backend/ctypeprim.py +++ b/pypy/module/_cffi_backend/ctypeprim.py @@ -158,21 +158,14 @@ class W_CTypePrimitiveSigned(W_CTypePrimitive): - _attrs_ = ['value_fits_long', 'vmin', 'vrangemax'] - _immutable_fields_ = ['value_fits_long', 'vmin', 'vrangemax'] + _attrs_ = ['value_fits_long', 'value_smaller_than_long'] + _immutable_fields_ = ['value_fits_long', 'value_smaller_than_long'] is_primitive_integer = True def __init__(self, *args): W_CTypePrimitive.__init__(self, *args) self.value_fits_long = self.size <= rffi.sizeof(lltype.Signed) - if self.size < rffi.sizeof(lltype.Signed): - assert self.value_fits_long - sh = self.size * 8 - self.vmin = r_uint(-1) << (sh - 1) - self.vrangemax = (r_uint(1) << sh) - 1 - else: - self.vmin = r_uint(0) - self.vrangemax = r_uint(-1) + self.value_smaller_than_long = self.size < rffi.sizeof(lltype.Signed) def cast_to_int(self, cdata): return self.convert_to_object(cdata) @@ -192,8 +185,17 @@ def convert_from_object(self, cdata, w_ob): if self.value_fits_long: value = misc.as_long(self.space, w_ob) - if self.size < rffi.sizeof(lltype.Signed): - if r_uint(value) - self.vmin > self.vrangemax: + if self.value_smaller_than_long: + size = self.size + if size == 1: + signextended = misc.signext(value, 1) + elif size == 2: + signextended = misc.signext(value, 2) + elif size == 4: + signextended = misc.signext(value, 4) + else: + raise AssertionError("unsupported size") + if value != signextended: self._overflow(w_ob) misc.write_raw_signed_data(cdata, value, self.size) else: @@ -221,7 +223,7 @@ length = w_cdata.get_array_length() populate_list_from_raw_array(res, buf, length) return res - elif self.value_fits_long: + elif self.value_smaller_than_long: res = [0] * w_cdata.get_array_length() misc.unpack_list_from_raw_array(res, w_cdata._cdata, self.size) return res @@ -235,8 +237,8 @@ cdata = rffi.cast(rffi.LONGP, cdata) copy_list_to_raw_array(int_list, cdata) else: - overflowed = misc.pack_list_to_raw_array_bounds( - int_list, cdata, self.size, self.vmin, self.vrangemax) + overflowed = misc.pack_list_to_raw_array_bounds_signed( + int_list, cdata, self.size) if overflowed != 0: self._overflow(self.space.wrap(overflowed)) return True @@ -314,8 +316,8 @@ def pack_list_of_items(self, cdata, w_ob): int_list = self.space.listview_int(w_ob) if int_list is not None: - overflowed = misc.pack_list_to_raw_array_bounds( - int_list, cdata, self.size, r_uint(0), self.vrangemax) + overflowed = misc.pack_list_to_raw_array_bounds_unsigned( + int_list, cdata, self.size, self.vrangemax) if overflowed != 0: self._overflow(self.space.wrap(overflowed)) return True diff --git a/pypy/module/_cffi_backend/misc.py b/pypy/module/_cffi_backend/misc.py --- a/pypy/module/_cffi_backend/misc.py +++ b/pypy/module/_cffi_backend/misc.py @@ -222,6 +222,19 @@ neg_msg = "can't convert negative number to unsigned" ovf_msg = "long too big to convert" +@specialize.arg(1) +def signext(value, size): + # 'value' is sign-extended from 'size' bytes to a full integer. + # 'size' should be a constant smaller than a full integer size. + if size == rffi.sizeof(rffi.SIGNEDCHAR): + return rffi.cast(lltype.Signed, rffi.cast(rffi.SIGNEDCHAR, value)) + elif size == rffi.sizeof(rffi.SHORT): + return rffi.cast(lltype.Signed, rffi.cast(rffi.SHORT, value)) + elif size == rffi.sizeof(rffi.INT): + return rffi.cast(lltype.Signed, rffi.cast(rffi.INT, value)) + else: + raise AssertionError("unsupported size") + # ____________________________________________________________ class _NotStandardObject(Exception): @@ -339,13 +352,26 @@ # ____________________________________________________________ -def pack_list_to_raw_array_bounds(int_list, target, size, vmin, vrangemax): +def pack_list_to_raw_array_bounds_signed(int_list, target, size): for TP, TPP in _prim_signed_types: if size == rffi.sizeof(TP): ptr = rffi.cast(TPP, target) for i in range(len(int_list)): x = int_list[i] - if r_uint(x) - vmin > vrangemax: + y = rffi.cast(TP, x) + if x != rffi.cast(lltype.Signed, y): + return x # overflow + ptr[i] = y + return 0 + raise NotImplementedError("bad integer size") + +def pack_list_to_raw_array_bounds_unsigned(int_list, target, size, vrangemax): + for TP, TPP in _prim_signed_types: + if size == rffi.sizeof(TP): + ptr = rffi.cast(TPP, target) + for i in range(len(int_list)): + x = int_list[i] + if r_uint(x) > vrangemax: return x # overflow ptr[i] = rffi.cast(TP, x) return 0 diff --git a/pypy/module/_ssl/thread_lock.py b/pypy/module/_ssl/thread_lock.py --- a/pypy/module/_ssl/thread_lock.py +++ b/pypy/module/_ssl/thread_lock.py @@ -24,12 +24,19 @@ separate_module_source = """ #include <openssl/crypto.h> +#ifndef _WIN32 +# include <pthread.h> +#endif static unsigned int _ssl_locks_count = 0; static struct RPyOpaque_ThreadLock *_ssl_locks; static unsigned long _ssl_thread_id_function(void) { - return RPyThreadGetIdent(); +#ifdef _WIN32 + return (unsigned long)GetCurrentThreadId(); +#else + return (unsigned long)pthread_self(); +#endif } static void _ssl_thread_locking_function(int mode, int n, const char *file, diff --git a/pypy/module/cpyext/src/pythread.c b/pypy/module/cpyext/src/pythread.c --- a/pypy/module/cpyext/src/pythread.c +++ b/pypy/module/cpyext/src/pythread.c @@ -1,11 +1,18 @@ #include <Python.h> +#ifndef _WIN32 +# include <pthread.h> +#endif #include "pythread.h" #include "src/thread.h" long PyThread_get_thread_ident(void) { - return RPyThreadGetIdent(); +#ifdef _WIN32 + return (long)GetCurrentThreadId(); +#else + return (long)pthread_self(); +#endif } PyThread_type_lock diff --git a/pypy/module/micronumpy/concrete.py b/pypy/module/micronumpy/concrete.py --- a/pypy/module/micronumpy/concrete.py +++ b/pypy/module/micronumpy/concrete.py @@ -47,6 +47,7 @@ def setitem(self, index, value): self.dtype.itemtype.store(self, index, 0, value) + @jit.unroll_safe def setslice(self, space, arr): if len(arr.get_shape()) > 0 and len(self.get_shape()) == 0: raise oefmt(space.w_ValueError, diff --git a/pypy/module/micronumpy/iterators.py b/pypy/module/micronumpy/iterators.py --- a/pypy/module/micronumpy/iterators.py +++ b/pypy/module/micronumpy/iterators.py @@ -154,7 +154,7 @@ index = state.index if self.track_index: index += 1 - indices = state.indices + indices = state.indices[:] offset = state.offset if self.contiguous: offset += self.array.dtype.elsize diff --git a/pypy/module/micronumpy/loop.py b/pypy/module/micronumpy/loop.py --- a/pypy/module/micronumpy/loop.py +++ b/pypy/module/micronumpy/loop.py @@ -88,6 +88,21 @@ reds = 'auto') def setslice(space, shape, target, source): + if not shape: + # XXX - simplify + target_iter, target_state = target.create_iter(shape) + source_iter, source_state = source.create_iter(shape) + dtype = target.dtype + val = source_iter.getitem(source_state) + if dtype.is_str_or_unicode(): + val = dtype.coerce(space, val) + else: + val = val.convert_to(space, dtype) + target_iter.setitem(target_state, val) + return target + return _setslice(space, shape, target, source) + +def _setslice(space, shape, target, source): # note that unlike everything else, target and source here are # array implementations, not arrays target_iter, target_state = target.create_iter(shape) diff --git a/pypy/module/micronumpy/nditer.py b/pypy/module/micronumpy/nditer.py --- a/pypy/module/micronumpy/nditer.py +++ b/pypy/module/micronumpy/nditer.py @@ -1,3 +1,4 @@ +from rpython.rlib import jit from pypy.interpreter.baseobjspace import W_Root from pypy.interpreter.typedef import TypeDef, GetSetProperty from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault @@ -264,8 +265,8 @@ self.index = [0] * len(shape) self.backward = backward + @jit.unroll_safe def next(self): - # TODO It's probably possible to refactor all the "next" method from each iterator for i in range(len(self.shape) - 1, -1, -1): if self.index[i] < self.shape[i] - 1: self.index[i] += 1 diff --git a/pypy/module/micronumpy/test/test_zjit.py b/pypy/module/micronumpy/test/test_zjit.py --- a/pypy/module/micronumpy/test/test_zjit.py +++ b/pypy/module/micronumpy/test/test_zjit.py @@ -127,12 +127,13 @@ assert result == 3 ** 2 self.check_trace_count(1) self.check_simple_loop({ - 'call': 3, + 'call': 1, 'float_add': 1, 'float_eq': 3, 'float_mul': 2, 'float_ne': 1, 'getarrayitem_gc': 1, + 'getarrayitem_raw': 1, # read the errno 'guard_false': 4, 'guard_not_invalidated': 1, 'guard_true': 3, @@ -144,6 +145,7 @@ 'raw_load': 2, 'raw_store': 1, 'setarrayitem_gc': 1, + 'setarrayitem_raw': 1, # write the errno }) def define_pow_int(): diff --git a/pypy/module/pypyjit/test_pypy_c/model.py b/pypy/module/pypyjit/test_pypy_c/model.py --- a/pypy/module/pypyjit/test_pypy_c/model.py +++ b/pypy/module/pypyjit/test_pypy_c/model.py @@ -184,10 +184,10 @@ matcher = OpMatcher(ops) return matcher.match(expected_src, **kwds) - def match_by_id(self, id, expected_src, **kwds): + def match_by_id(self, id, expected_src, ignore_ops=[], **kwds): ops = list(self.ops_by_id(id, **kwds)) matcher = OpMatcher(ops, id) - return matcher.match(expected_src) + return matcher.match(expected_src, ignore_ops=ignore_ops) class PartialTraceWithIds(TraceWithIds): def __init__(self, trace, is_entry_bridge=False): diff --git a/pypy/module/pypyjit/test_pypy_c/test_call.py b/pypy/module/pypyjit/test_pypy_c/test_call.py --- a/pypy/module/pypyjit/test_pypy_c/test_call.py +++ b/pypy/module/pypyjit/test_pypy_c/test_call.py @@ -82,7 +82,7 @@ assert log.opnames(ops) == [] # assert entry_bridge.match_by_id('call', """ - p38 = call(ConstClass(_ll_0_threadlocalref_getter___), descr=<Callr . EF=1 OS=5>) + p38 = call(ConstClass(_ll_1_threadlocalref_get__Ptr_GcStruct_objectLlT_Signed), #, descr=<Callr . i EF=1 OS=5>) p39 = getfield_gc(p38, descr=<FieldP pypy.interpreter.executioncontext.ExecutionContext.inst_topframeref .*>) i40 = force_token() p41 = getfield_gc_pure(p38, descr=<FieldP pypy.interpreter.executioncontext.ExecutionContext.inst_w_tracefunc .*>) @@ -444,7 +444,7 @@ p26 = getfield_gc(p7, descr=<FieldP pypy.objspace.std.dictmultiobject.W_DictMultiObject.inst_strategy .*>) guard_value(p26, ConstPtr(ptr27), descr=...) guard_not_invalidated(descr=...) - p29 = call(ConstClass(_ll_0_threadlocalref_getter___), descr=<Callr . EF=1 OS=5>) + p29 = call(ConstClass(_ll_1_threadlocalref_get__Ptr_GcStruct_objectLlT_Signed), #, descr=<Callr . i EF=1 OS=5>) p30 = getfield_gc(p29, descr=<FieldP pypy.interpreter.executioncontext.ExecutionContext.inst_topframeref .*>) p31 = force_token() p32 = getfield_gc_pure(p29, descr=<FieldP pypy.interpreter.executioncontext.ExecutionContext.inst_w_tracefunc .*>) diff --git a/pypy/module/pypyjit/test_pypy_c/test_ffi.py b/pypy/module/pypyjit/test_pypy_c/test_ffi.py --- a/pypy/module/pypyjit/test_pypy_c/test_ffi.py +++ b/pypy/module/pypyjit/test_pypy_c/test_ffi.py @@ -199,21 +199,16 @@ ldexp_addr, res = log.result assert res == 8.0 * 300 loop, = log.loops_by_filename(self.filepath) - if 'ConstClass(ldexp)' in repr(loop): # e.g. OS/X - ldexp_addr = 'ConstClass(ldexp)' assert loop.match_by_id('cfficall', """ - ... - f1 = call_release_gil(..., descr=<Callf 8 fi EF=6 OS=62>) - ... - """) - ops = loop.ops_by_id('cfficall') - for name in ['raw_malloc', 'raw_free']: - assert name not in str(ops) - for name in ['raw_load', 'raw_store', 'getarrayitem_raw', 'setarrayitem_raw']: - assert name not in log.opnames(ops) - # so far just check that call_release_gil() is produced. - # later, also check that the arguments to call_release_gil() - # are constants + setarrayitem_raw(i69, 0, i95, descr=<ArrayS 4>) # write 'errno' + p96 = force_token() + setfield_gc(p0, p96, descr=<FieldP pypy.interpreter.pyframe.PyFrame.vable_token .>) + f97 = call_release_gil(i59, 1.0, 3, descr=<Callf 8 fi EF=6 OS=62>) + guard_not_forced(descr=...) + guard_no_exception(descr=...) + i98 = getarrayitem_raw(i69, 0, descr=<ArrayS 4>) # read 'errno' + setfield_gc(p65, i98, descr=<FieldS pypy.interpreter.executioncontext.ExecutionContext.inst__cffi_saved_errno .>) + """, ignore_ops=['guard_not_invalidated']) def test_cffi_call_guard_not_forced_fails(self): # this is the test_pypy_c equivalent of @@ -340,18 +335,16 @@ guard_value(p166, ConstPtr(ptr72), descr=...) p167 = call(ConstClass(_ll_0_alloc_with_del___), descr=<Callr . EF=4>) guard_no_exception(descr=...) - i112 = int_sub(i160, -32768) + i112 = int_signext(i160, 2) setfield_gc(p167, ConstPtr(null), descr=<FieldP pypy.module._cffi_backend.cdataobj.W_CData.inst__lifeline_ .+>) setfield_gc(p167, ConstPtr(ptr85), descr=<FieldP pypy.module._cffi_backend.cdataobj.W_CData.inst_ctype .+>) - i114 = uint_gt(i112, 65535) + i114 = int_ne(i160, i112) guard_false(i114, descr=...) - i115 = int_and(i112, 65535) - i116 = int_add(i115, -32768) --TICK-- i119 = call(ConstClass(_ll_1_raw_malloc_varsize__Signed), 6, descr=<Calli . i EF=4 OS=110>) - raw_store(i119, 0, i116, descr=<ArrayS 2>) - raw_store(i119, 2, i116, descr=<ArrayS 2>) - raw_store(i119, 4, i116, descr=<ArrayS 2>) + raw_store(i119, 0, i112, descr=<ArrayS 2>) + raw_store(i119, 2, i112, descr=<ArrayS 2>) + raw_store(i119, 4, i112, descr=<ArrayS 2>) setfield_gc(p167, i119, descr=<FieldU pypy.module._cffi_backend.cdataobj.W_CData.inst__cdata .+>) i123 = arraylen_gc(p67, descr=<ArrayP .>) jump(..., descr=...) diff --git a/rpython/annotator/bookkeeper.py b/rpython/annotator/bookkeeper.py --- a/rpython/annotator/bookkeeper.py +++ b/rpython/annotator/bookkeeper.py @@ -65,6 +65,7 @@ self.external_class_cache = {} # cache of ExternalType classes self.needs_generic_instantiate = {} + self.thread_local_fields = set() delayed_imports() diff --git a/rpython/jit/backend/arm/assembler.py b/rpython/jit/backend/arm/assembler.py --- a/rpython/jit/backend/arm/assembler.py +++ b/rpython/jit/backend/arm/assembler.py @@ -497,9 +497,11 @@ if self.cpu.supports_floats: mc.VPOP([reg.value for reg in r.callee_saved_vfp_registers], cond=cond) - # pop all callee saved registers and IP to keep the alignment + # pop all callee saved registers. This pops 'pc' last. + # It also pops the threadlocal_addr back into 'r1', but it + # is not needed any more and will be discarded. mc.POP([reg.value for reg in r.callee_restored_registers] + - [r.ip.value], cond=cond) + [r.r1.value], cond=cond) mc.BKPT() def gen_func_prolog(self): @@ -508,11 +510,16 @@ if self.cpu.supports_floats: stack_size += len(r.callee_saved_vfp_registers) * 2 * WORD - # push all callee saved registers and IP to keep the alignment + # push all callee saved registers including lr; and push r1 as + # well, which contains the threadlocal_addr argument. Note that + # we're pushing a total of 10 words, which keeps the stack aligned. self.mc.PUSH([reg.value for reg in r.callee_saved_registers] + - [r.ip.value]) + [r.r1.value]) + self.saved_threadlocal_addr = 0 # at offset 0 from location 'sp' if self.cpu.supports_floats: self.mc.VPUSH([reg.value for reg in r.callee_saved_vfp_registers]) + self.saved_threadlocal_addr += ( + len(r.callee_saved_vfp_registers) * 2 * WORD) assert stack_size % 8 == 0 # ensure we keep alignment # set fp to point to the JITFRAME @@ -952,16 +959,11 @@ regalloc._check_invariants() self.mc.mark_op(None) # end of the loop - def regalloc_emit_llong(self, op, arglocs, fcond, regalloc): + def regalloc_emit_extra(self, op, arglocs, fcond, regalloc): + # for calls to a function with a specifically-supported OS_xxx effectinfo = op.getdescr().get_extra_info() oopspecindex = effectinfo.oopspecindex - asm_llong_operations[oopspecindex](self, op, arglocs, regalloc, fcond) - return fcond - - def regalloc_emit_math(self, op, arglocs, fcond, regalloc): - effectinfo = op.getdescr().get_extra_info() - oopspecindex = effectinfo.oopspecindex - asm_math_operations[oopspecindex](self, op, arglocs, regalloc, fcond) + asm_extra_operations[oopspecindex](self, op, arglocs, regalloc, fcond) return fcond def patch_trace(self, faildescr, looptoken, bridge_addr, regalloc): @@ -1150,6 +1152,14 @@ else: assert 0, 'unsupported case' + def _mov_raw_sp_to_loc(self, prev_loc, loc, cond=c.AL): + if loc.is_core_reg(): + # load a value from 'SP + n' + assert prev_loc.value <= 0xFFF # not too far + self.load_reg(self.mc, loc, r.sp, prev_loc.value, cond=cond) + else: + assert 0, 'unsupported case' + def regalloc_mov(self, prev_loc, loc, cond=c.AL): """Moves a value from a previous location to some other location""" if prev_loc.is_imm(): @@ -1163,7 +1173,7 @@ elif prev_loc.is_vfp_reg(): self._mov_vfp_reg_to_loc(prev_loc, loc, cond) elif prev_loc.is_raw_sp(): - assert 0, 'raw sp locs are not supported as source loc' + self._mov_raw_sp_to_loc(prev_loc, loc, cond) else: assert 0, 'unsupported case' mov_loc_loc = regalloc_mov @@ -1509,22 +1519,17 @@ asm_operations = [notimplemented_op] * (rop._LAST + 1) asm_operations_with_guard = [notimplemented_op_with_guard] * (rop._LAST + 1) -asm_llong_operations = {} -asm_math_operations = {} +asm_extra_operations = {} for name, value in ResOpAssembler.__dict__.iteritems(): if name.startswith('emit_guard_'): opname = name[len('emit_guard_'):] num = getattr(rop, opname.upper()) asm_operations_with_guard[num] = value - elif name.startswith('emit_op_llong_'): - opname = name[len('emit_op_llong_'):] - num = getattr(EffectInfo, 'OS_LLONG_' + opname.upper()) - asm_llong_operations[num] = value - elif name.startswith('emit_op_math_'): - opname = name[len('emit_op_math_'):] - num = getattr(EffectInfo, 'OS_MATH_' + opname.upper()) - asm_math_operations[num] = value + elif name.startswith('emit_opx_'): + opname = name[len('emit_opx_'):] + num = getattr(EffectInfo, 'OS_' + opname.upper()) + asm_extra_operations[num] = value elif name.startswith('emit_op_'): opname = name[len('emit_op_'):] num = getattr(rop, opname.upper()) diff --git a/rpython/jit/backend/arm/codebuilder.py b/rpython/jit/backend/arm/codebuilder.py --- a/rpython/jit/backend/arm/codebuilder.py +++ b/rpython/jit/backend/arm/codebuilder.py @@ -318,6 +318,18 @@ | (rd & 0xF) << 12 | imm16 & 0xFFF) + def SXTB_rr(self, rd, rm, c=cond.AL): + self.write32(c << 28 + | 0x06AF0070 + | (rd & 0xF) << 12 + | (rm & 0xF)) + + def SXTH_rr(self, rd, rm, c=cond.AL): + self.write32(c << 28 + | 0x06BF0070 + | (rd & 0xF) << 12 + | (rm & 0xF)) + def LDREX(self, rt, rn, c=cond.AL): self.write32(c << 28 | 0x01900f9f diff --git a/rpython/jit/backend/arm/locations.py b/rpython/jit/backend/arm/locations.py --- a/rpython/jit/backend/arm/locations.py +++ b/rpython/jit/backend/arm/locations.py @@ -46,7 +46,7 @@ def is_core_reg(self): return True - def as_key(self): + def as_key(self): # 0 <= as_key <= 15 return self.value @@ -64,7 +64,7 @@ def is_vfp_reg(self): return True - def as_key(self): + def as_key(self): # 20 <= as_key <= 35 return self.value + 20 def is_float(self): @@ -115,8 +115,8 @@ def is_imm_float(self): return True - def as_key(self): - return self.value + def as_key(self): # a real address + 1 + return self.value | 1 def is_float(self): return True @@ -148,7 +148,7 @@ def is_stack(self): return True - def as_key(self): + def as_key(self): # an aligned word + 10000 return self.position + 10000 def is_float(self): @@ -174,6 +174,9 @@ def is_float(self): return self.type == FLOAT + def as_key(self): # a word >= 1000, and < 1000 + size of SP frame + return self.value + 1000 + def imm(i): return ImmLocation(i) diff --git a/rpython/jit/backend/arm/opassembler.py b/rpython/jit/backend/arm/opassembler.py --- a/rpython/jit/backend/arm/opassembler.py +++ b/rpython/jit/backend/arm/opassembler.py @@ -19,7 +19,7 @@ from rpython.jit.backend.arm.codebuilder import InstrBuilder, OverwritingBuilder from rpython.jit.backend.arm.jump import remap_frame_layout from rpython.jit.backend.arm.regalloc import TempBox -from rpython.jit.backend.arm.locations import imm +from rpython.jit.backend.arm.locations import imm, RawSPStackLocation from rpython.jit.backend.llsupport import symbolic from rpython.jit.backend.llsupport.gcmap import allocate_gcmap from rpython.jit.backend.llsupport.descr import InteriorFieldDescr @@ -102,6 +102,17 @@ self.mc.MOV_rr(res.value, arg.value, cond=c.GE) return fcond + def emit_op_int_signext(self, op, arglocs, regalloc, fcond): + arg, numbytes, res = arglocs + assert numbytes.is_imm() + if numbytes.value == 1: + self.mc.SXTB_rr(res.value, arg.value) + elif numbytes.value == 2: + self.mc.SXTH_rr(res.value, arg.value) + else: + raise AssertionError("bad number of bytes") + return fcond + #ref: http://blogs.arm.com/software-enablement/detecting-overflow-from-mul/ def emit_guard_int_mul_ovf(self, op, guard, arglocs, regalloc, fcond): reg1 = arglocs[0] @@ -971,7 +982,9 @@ return fcond def _call_assembler_emit_call(self, addr, argloc, resloc): - self.simple_call(addr, [argloc], result_loc=resloc) + ofs = self.saved_threadlocal_addr + threadlocal_loc = RawSPStackLocation(ofs, INT) + self.simple_call(addr, [argloc, threadlocal_loc], result_loc=resloc) def _call_assembler_emit_helper_call(self, addr, arglocs, resloc): self.simple_call(addr, arglocs, result_loc=resloc) @@ -1097,7 +1110,7 @@ emit_op_float_neg = gen_emit_unary_float_op('float_neg', 'VNEG') emit_op_float_abs = gen_emit_unary_float_op('float_abs', 'VABS') - emit_op_math_sqrt = gen_emit_unary_float_op('math_sqrt', 'VSQRT') + emit_opx_math_sqrt = gen_emit_unary_float_op('math_sqrt', 'VSQRT') emit_op_float_lt = gen_emit_float_cmp_op('float_lt', c.VFP_LT) emit_op_float_le = gen_emit_float_cmp_op('float_le', c.VFP_LE) @@ -1131,13 +1144,13 @@ # the following five instructions are only ARMv7; # regalloc.py won't call them at all on ARMv6 - emit_op_llong_add = gen_emit_float_op('llong_add', 'VADD_i64') - emit_op_llong_sub = gen_emit_float_op('llong_sub', 'VSUB_i64') - emit_op_llong_and = gen_emit_float_op('llong_and', 'VAND_i64') - emit_op_llong_or = gen_emit_float_op('llong_or', 'VORR_i64') - emit_op_llong_xor = gen_emit_float_op('llong_xor', 'VEOR_i64') + emit_opx_llong_add = gen_emit_float_op('llong_add', 'VADD_i64') + emit_opx_llong_sub = gen_emit_float_op('llong_sub', 'VSUB_i64') + emit_opx_llong_and = gen_emit_float_op('llong_and', 'VAND_i64') + emit_opx_llong_or = gen_emit_float_op('llong_or', 'VORR_i64') + emit_opx_llong_xor = gen_emit_float_op('llong_xor', 'VEOR_i64') - def emit_op_llong_to_int(self, op, arglocs, regalloc, fcond): + def emit_opx_llong_to_int(self, op, arglocs, regalloc, fcond): loc = arglocs[0] res = arglocs[1] assert loc.is_vfp_reg() @@ -1271,3 +1284,11 @@ regalloc.rm.possibly_free_var(length_box) regalloc.rm.possibly_free_var(dstaddr_box) return fcond + + def emit_opx_threadlocalref_get(self, op, arglocs, regalloc, fcond): + ofs0, res = arglocs + assert ofs0.is_imm() + ofs = self.saved_threadlocal_addr + self.load_reg(self.mc, res, r.sp, ofs) + self.load_reg(self.mc, res, res, ofs0.value) + return fcond diff --git a/rpython/jit/backend/arm/regalloc.py b/rpython/jit/backend/arm/regalloc.py --- a/rpython/jit/backend/arm/regalloc.py +++ b/rpython/jit/backend/arm/regalloc.py @@ -373,11 +373,8 @@ return gcmap # ------------------------------------------------------------ - def perform_llong(self, op, args, fcond): - return self.assembler.regalloc_emit_llong(op, args, fcond, self) - - def perform_math(self, op, args, fcond): - return self.assembler.regalloc_emit_math(op, args, self, fcond) + def perform_extra(self, op, args, fcond): + return self.assembler.regalloc_emit_extra(op, args, fcond, self) def force_spill_var(self, var): if var.type == FLOAT: @@ -458,6 +455,12 @@ resloc = self.force_allocate_reg(op.result, [op.getarg(0)]) return [argloc, resloc] + def prepare_op_int_signext(self, op, fcond): + argloc = self.make_sure_var_in_reg(op.getarg(0)) + numbytes = op.getarg(1).getint() + resloc = self.force_allocate_reg(op.result) + return [argloc, imm(numbytes), resloc] + def prepare_guard_int_mul_ovf(self, op, guard, fcond): boxes = op.getarglist() reg1 = self.make_sure_var_in_reg(boxes[0], forbidden_vars=boxes) @@ -552,15 +555,19 @@ EffectInfo.OS_LLONG_XOR): if self.cpu.cpuinfo.arch_version >= 7: args = self._prepare_llong_binop_xx(op, fcond) - self.perform_llong(op, args, fcond) + self.perform_extra(op, args, fcond) return elif oopspecindex == EffectInfo.OS_LLONG_TO_INT: args = self._prepare_llong_to_int(op, fcond) - self.perform_llong(op, args, fcond) + self.perform_extra(op, args, fcond) return elif oopspecindex == EffectInfo.OS_MATH_SQRT: - args = self.prepare_op_math_sqrt(op, fcond) - self.perform_math(op, args, fcond) + args = self._prepare_op_math_sqrt(op, fcond) + self.perform_extra(op, args, fcond) + return + elif oopspecindex == EffectInfo.OS_THREADLOCALREF_GET: + args = self._prepare_threadlocalref_get(op, fcond) + self.perform_extra(op, args, fcond) return #elif oopspecindex == EffectInfo.OS_MATH_READ_TIMESTAMP: # ... @@ -618,6 +625,11 @@ res = self.force_allocate_reg(op.result) return [loc0, res] + def _prepare_threadlocalref_get(self, op, fcond): + ofs0 = imm(op.getarg(1).getint()) + res = self.force_allocate_reg(op.result) + return [ofs0, res] + def _prepare_guard(self, op, args=None): if args is None: args = [] @@ -1278,7 +1290,7 @@ prepare_guard_float_ge = prepare_float_op(guard=True, float_result=False, name='prepare_guard_float_ge') - def prepare_op_math_sqrt(self, op, fcond): + def _prepare_op_math_sqrt(self, op, fcond): loc = self.make_sure_var_in_reg(op.getarg(1)) self.possibly_free_vars_for_op(op) self.free_temp_vars() diff --git a/rpython/jit/backend/llsupport/llmodel.py b/rpython/jit/backend/llsupport/llmodel.py --- a/rpython/jit/backend/llsupport/llmodel.py +++ b/rpython/jit/backend/llsupport/llmodel.py @@ -217,7 +217,13 @@ return lltype.cast_opaque_ptr(llmemory.GCREF, frame) def make_execute_token(self, *ARGS): - FUNCPTR = lltype.Ptr(lltype.FuncType([llmemory.GCREF], + # The JIT backend must generate functions with the following + # signature: it takes the jitframe and the threadlocal_addr + # as arguments, and it returns the (possibly reallocated) jitframe. + # The backend can optimize OS_THREADLOCALREF_GET calls to return a + # field of this threadlocal_addr, but only if 'translate_support_code': + # in untranslated tests, threadlocal_addr is a dummy NULL. + FUNCPTR = lltype.Ptr(lltype.FuncType([llmemory.GCREF, llmemory.Address], llmemory.GCREF)) lst = [(i, history.getkind(ARG)[0]) for i, ARG in enumerate(ARGS)] @@ -249,8 +255,13 @@ else: assert kind == history.REF self.set_ref_value(ll_frame, num, arg) + if self.translate_support_code: + ll_threadlocal_addr = llop.threadlocalref_addr( + llmemory.Address) + else: + ll_threadlocal_addr = llmemory.NULL llop.gc_writebarrier(lltype.Void, ll_frame) - ll_frame = func(ll_frame) + ll_frame = func(ll_frame, ll_threadlocal_addr) finally: if not self.translate_support_code: LLInterpreter.current_interpreter = prev_interpreter diff --git a/rpython/jit/backend/llsupport/test/ztranslation_test.py b/rpython/jit/backend/llsupport/test/ztranslation_test.py --- a/rpython/jit/backend/llsupport/test/ztranslation_test.py +++ b/rpython/jit/backend/llsupport/test/ztranslation_test.py @@ -26,8 +26,6 @@ # - profiler # - full optimizer # - floats neg and abs - # - threadlocalref_get - # - get_errno, set_errno # - llexternal with macro=True class Frame(object): @@ -36,10 +34,6 @@ def __init__(self, i): self.i = i - class Foo(object): - pass - t = ThreadLocalReference(Foo) - eci = ExternalCompilationInfo(post_include_bits=[''' #define pypy_my_fabs(x) fabs(x) ''']) @@ -74,9 +68,6 @@ k = myabs1(myabs2(j)) if k - abs(j): raise ValueError if k - abs(-j): raise ValueError - if t.get().nine != 9: raise ValueError - rposix.set_errno(total) - if rposix.get_errno() != total: raise ValueError return chr(total % 253) # class Virt2(object): @@ -104,12 +95,8 @@ return res # def main(i, j): - foo = Foo() - foo.nine = -(i + j) - t.set(foo) a_char = f(i, j) a_float = libffi_stuff(i, j) - keepalive_until_here(foo) return ord(a_char) * 10 + int(a_float) expected = main(40, -49) res = self.meta_interp(main, [40, -49]) @@ -121,6 +108,7 @@ def test_direct_assembler_call_translates(self): """Test CALL_ASSEMBLER and the recursion limit""" + # - also tests threadlocalref_get from rpython.rlib.rstackovf import StackOverflow class Thing(object): @@ -138,6 +126,10 @@ somewhere_else = SomewhereElse() + class Foo(object): + pass + t = ThreadLocalReference(Foo) + def change(newthing): somewhere_else.frame.thing = newthing @@ -163,6 +155,7 @@ nextval = 13 frame.thing = Thing(nextval + 1) i += 1 + if t.get().nine != 9: raise ValueError return frame.thing.val driver2 = JitDriver(greens = [], reds = ['n']) @@ -184,13 +177,24 @@ n = portal2(n) assert portal2(10) == -9 + def setup(value): + foo = Foo() + foo.nine = value + t.set(foo) + return foo + def mainall(codeno, bound): - return main(codeno) + main2(bound) + foo = setup(bound + 8) + result = main(codeno) + main2(bound) + keepalive_until_here(foo) + return result + tmp_obj = setup(9) + expected_1 = main(0) res = self.meta_interp(mainall, [0, 1], inline=True, policy=StopAtXPolicy(change)) print hex(res) - assert res & 255 == main(0) + assert res & 255 == expected_1 bound = res & ~255 assert 1024 <= bound <= 131072 assert bound & (bound-1) == 0 # a power of two diff --git a/rpython/jit/backend/test/runner_test.py b/rpython/jit/backend/test/runner_test.py --- a/rpython/jit/backend/test/runner_test.py +++ b/rpython/jit/backend/test/runner_test.py @@ -3890,6 +3890,26 @@ deadframe = self.cpu.execute_token(looptoken, inp) assert outp == self.cpu.get_int_value(deadframe, 0) + def test_int_signext(self): + numbytes_cases = [1, 2] if IS_32_BIT else [1, 2, 4] + for numbytes in numbytes_cases: + ops = """ + [i0] + i1 = int_signext(i0, %d) + finish(i1, descr=descr) + """ % numbytes + descr = BasicFinalDescr() + loop = parse(ops, self.cpu, namespace=locals()) + looptoken = JitCellToken() + self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken) + test_cases = [random.randrange(-sys.maxint-1, sys.maxint+1) + for _ in range(100)] + for test_case in test_cases: + deadframe = self.cpu.execute_token(looptoken, test_case) + got = self.cpu.get_int_value(deadframe, 0) + expected = heaptracker.int_signext(test_case, numbytes) + assert got == expected + def test_compile_asmlen(self): from rpython.jit.backend.llsupport.llmodel import AbstractLLCPU if not isinstance(self.cpu, AbstractLLCPU): diff --git a/rpython/jit/backend/x86/arch.py b/rpython/jit/backend/x86/arch.py --- a/rpython/jit/backend/x86/arch.py +++ b/rpython/jit/backend/x86/arch.py @@ -34,10 +34,16 @@ FRAME_FIXED_SIZE = 19 PASS_ON_MY_FRAME = 15 JITFRAME_FIXED_SIZE = 6 + 8 * 2 # 6 GPR + 8 XMM * 2 WORDS/float + # 'threadlocal_addr' is passed as 2nd argument on the stack, + # and it can be left here for when it is needed + THREADLOCAL_OFS = (FRAME_FIXED_SIZE + 2) * WORD else: - # rbp + rbx + r12 + r13 + r14 + r15 + 13 extra words = 19 + # rbp + rbx + r12 + r13 + r14 + r15 + threadlocal + 12 extra words = 19 FRAME_FIXED_SIZE = 19 - PASS_ON_MY_FRAME = 13 + PASS_ON_MY_FRAME = 12 JITFRAME_FIXED_SIZE = 28 # 13 GPR + 15 XMM + # 'threadlocal_addr' is passed as 2nd argument in %esi, + # and is moved into this frame location + THREADLOCAL_OFS = (FRAME_FIXED_SIZE - 1) * WORD assert PASS_ON_MY_FRAME >= 12 # asmgcc needs at least JIT_USE_WORDS + 3 diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py --- a/rpython/jit/backend/x86/assembler.py +++ b/rpython/jit/backend/x86/assembler.py @@ -18,7 +18,7 @@ from rpython.jit.backend.llsupport.regalloc import (get_scale, valid_addressing_size) from rpython.jit.backend.x86.arch import (FRAME_FIXED_SIZE, WORD, IS_X86_64, JITFRAME_FIXED_SIZE, IS_X86_32, - PASS_ON_MY_FRAME) + PASS_ON_MY_FRAME, THREADLOCAL_OFS) from rpython.jit.backend.x86.regloc import (eax, ecx, edx, ebx, esp, ebp, esi, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, r8, r9, r10, r11, edi, r12, r13, r14, r15, X86_64_SCRATCH_REG, X86_64_XMM_SCRATCH_REG, @@ -730,6 +730,7 @@ self.mc.SUB_ri(esp.value, FRAME_FIXED_SIZE * WORD) self.mc.MOV_sr(PASS_ON_MY_FRAME * WORD, ebp.value) if IS_X86_64: + self.mc.MOV_sr(THREADLOCAL_OFS, esi.value) self.mc.MOV_rr(ebp.value, edi.value) else: self.mc.MOV_rs(ebp.value, (FRAME_FIXED_SIZE + 1) * WORD) @@ -1143,6 +1144,18 @@ def genop_math_sqrt(self, op, arglocs, resloc): self.mc.SQRTSD(arglocs[0], resloc) + def genop_int_signext(self, op, arglocs, resloc): + argloc, numbytesloc = arglocs + assert isinstance(numbytesloc, ImmedLoc) + if numbytesloc.value == 1: + self.mc.MOVSX8(resloc, argloc) + elif numbytesloc.value == 2: + self.mc.MOVSX16(resloc, argloc) + elif IS_X86_64 and numbytesloc.value == 4: + self.mc.MOVSX32(resloc, argloc) + else: + raise AssertionError("bad number of bytes") + def genop_guard_float_ne(self, op, guard_op, guard_token, arglocs, result_loc): guard_opnum = guard_op.getopnum() if isinstance(arglocs[0], RegLoc): @@ -1957,7 +1970,8 @@ self._emit_guard_not_forced(guard_token) def _call_assembler_emit_call(self, addr, argloc, _): - self.simple_call(addr, [argloc]) + threadlocal_loc = RawEspLoc(THREADLOCAL_OFS, INT) + self.simple_call(addr, [argloc, threadlocal_loc]) def _call_assembler_emit_helper_call(self, addr, arglocs, result_loc): self.simple_call(addr, arglocs, result_loc) @@ -2322,48 +2336,16 @@ assert isinstance(reg, RegLoc) self.mc.MOV_rr(reg.value, ebp.value) - def threadlocalref_get(self, op, resloc): - # this function is only called on Linux - from rpython.jit.codewriter.jitcode import ThreadLocalRefDescr - from rpython.jit.backend.x86 import stmtlocal + def threadlocalref_get(self, offset, resloc): + # This loads the stack location THREADLOCAL_OFS into a + # register, and then read the word at the given offset. + # It is only supported if 'translate_support_code' is + # true; otherwise, the original call to the piece of assembler + # was done with a dummy NULL value. + assert self.cpu.translate_support_code assert isinstance(resloc, RegLoc) - effectinfo = op.getdescr().get_extra_info() - assert effectinfo.extradescrs is not None - ed = effectinfo.extradescrs[0] - assert isinstance(ed, ThreadLocalRefDescr) - addr1 = rffi.cast(lltype.Signed, ed.get_tlref_addr()) - # 'addr1' is the address is the current thread, but we assume that - # it is a thread-local at a constant offset from %fs/%gs. - addr0 = stmtlocal.threadlocal_base() - addr = addr1 - addr0 - assert rx86.fits_in_32bits(addr) - mc = self.mc - mc.writechar(stmtlocal.SEGMENT_TL) # prefix: %fs or %gs - mc.MOV_rj(resloc.value, addr) # memory read - - def get_set_errno(self, op, loc, issue_a_write): - # this function is only called on Linux - from rpython.jit.backend.x86 import stmtlocal - addr = stmtlocal.get_errno_tl() - assert rx86.fits_in_32bits(addr) - mc = self.mc - mc.writechar(stmtlocal.SEGMENT_TL) # prefix: %fs or %gs - # !!important: the *next* instruction must be the one using 'addr'!! - if issue_a_write: - if isinstance(loc, RegLoc): - mc.MOV32_jr(addr, loc.value) # memory write from reg - else: - assert isinstance(loc, ImmedLoc) - newvalue = loc.value - newvalue = rffi.cast(rffi.INT, newvalue) - newvalue = rffi.cast(lltype.Signed, newvalue) - mc.MOV32_ji(addr, newvalue) # memory write immediate - else: - assert isinstance(loc, RegLoc) - if IS_X86_32: - mc.MOV_rj(loc.value, addr) # memory read - elif IS_X86_64: - mc.MOVSX32_rj(loc.value, addr) # memory read, sign-extend + self.mc.MOV_rs(resloc.value, THREADLOCAL_OFS) + self.mc.MOV_rm(resloc.value, (resloc.value, offset)) def genop_discard_zero_array(self, op, arglocs): (base_loc, startindex_loc, bytes_loc, diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py --- a/rpython/jit/backend/x86/regalloc.py +++ b/rpython/jit/backend/x86/regalloc.py @@ -474,6 +474,12 @@ consider_int_invert = consider_int_neg + def consider_int_signext(self, op): + argloc = self.loc(op.getarg(0)) + numbytesloc = self.loc(op.getarg(1)) + resloc = self.force_allocate_reg(op.result) + self.perform(op, [argloc, numbytesloc], resloc) + def consider_int_lshift(self, op): if isinstance(op.getarg(1), Const): loc2 = self.rm.convert_to_imm(op.getarg(1)) @@ -693,29 +699,11 @@ loc0 = self.xrm.force_result_in_reg(op.result, op.getarg(1)) self.perform_math(op, [loc0], loc0) - TLREF_SUPPORT = sys.platform.startswith('linux') - ERRNO_SUPPORT = sys.platform.startswith('linux') - def _consider_threadlocalref_get(self, op): - if self.TLREF_SUPPORT: + if self.translate_support_code: + offset = op.getarg(1).getint() # getarg(0) == 'threadlocalref_get' resloc = self.force_allocate_reg(op.result) - self.assembler.threadlocalref_get(op, resloc) - else: - self._consider_call(op) - - def _consider_get_errno(self, op): - if self.ERRNO_SUPPORT: - resloc = self.force_allocate_reg(op.result) - self.assembler.get_set_errno(op, resloc, issue_a_write=False) - else: - self._consider_call(op) - - def _consider_set_errno(self, op): - if self.ERRNO_SUPPORT: - # op.getarg(0) is the function set_errno; op.getarg(1) is - # the new errno value - loc0 = self.rm.make_sure_var_in_reg(op.getarg(1)) - self.assembler.get_set_errno(op, loc0, issue_a_write=True) + self.assembler.threadlocalref_get(offset, resloc) else: self._consider_call(op) @@ -798,10 +786,6 @@ return self._consider_math_sqrt(op) if oopspecindex == EffectInfo.OS_THREADLOCALREF_GET: return self._consider_threadlocalref_get(op) - if oopspecindex == EffectInfo.OS_GET_ERRNO: - return self._consider_get_errno(op) - if oopspecindex == EffectInfo.OS_SET_ERRNO: - return self._consider_set_errno(op) if oopspecindex == EffectInfo.OS_MATH_READ_TIMESTAMP: return self._consider_math_read_timestamp(op) self._consider_call(op) diff --git a/rpython/jit/backend/x86/stmtlocal.py b/rpython/jit/backend/x86/stmtlocal.py deleted file mode 100644 --- a/rpython/jit/backend/x86/stmtlocal.py +++ /dev/null @@ -1,43 +0,0 @@ -from rpython.rtyper.lltypesystem import lltype, rffi -from rpython.translator.tool.cbuild import ExternalCompilationInfo -from rpython.jit.backend.x86.arch import WORD - -SEGMENT_FS = '\x64' -SEGMENT_GS = '\x65' - -if WORD == 4: - SEGMENT_TL = SEGMENT_GS - _instruction = "movl %%gs:0, %0" -else: - SEGMENT_TL = SEGMENT_FS - _instruction = "movq %%fs:0, %0" - -eci = ExternalCompilationInfo(post_include_bits=[''' -#define RPY_STM_JIT 1 -static long pypy__threadlocal_base(void) -{ - /* XXX ONLY LINUX WITH GCC/CLANG FOR NOW XXX */ - long result; - asm("%s" : "=r"(result)); - return result; -} -static long pypy__get_errno_tl(void) -{ - return ((long)&errno) - pypy__threadlocal_base(); -} -''' % _instruction]) - - -threadlocal_base = rffi.llexternal( - 'pypy__threadlocal_base', - [], lltype.Signed, - compilation_info=eci, - _nowrapper=True, - ) #transactionsafe=True) - -get_errno_tl = rffi.llexternal( - 'pypy__get_errno_tl', - [], lltype.Signed, - compilation_info=eci, - _nowrapper=True, - ) #transactionsafe=True) diff --git a/rpython/jit/codewriter/assembler.py b/rpython/jit/codewriter/assembler.py --- a/rpython/jit/codewriter/assembler.py +++ b/rpython/jit/codewriter/assembler.py @@ -216,10 +216,11 @@ self.code[pos ] = chr(target & 0xFF) self.code[pos+1] = chr(target >> 8) for descr in self.switchdictdescrs: - descr.dict = {} + as_dict = {} for key, switchlabel in descr._labels: target = self.label_positions[switchlabel.name] - descr.dict[key] = target + as_dict[key] = target + descr.attach(as_dict) def check_result(self): # Limitation of the number of registers, from the single-byte encoding diff --git a/rpython/jit/codewriter/effectinfo.py b/rpython/jit/codewriter/effectinfo.py --- a/rpython/jit/codewriter/effectinfo.py +++ b/rpython/jit/codewriter/effectinfo.py @@ -23,8 +23,6 @@ OS_SHRINK_ARRAY = 3 # rgc.ll_shrink_array OS_DICT_LOOKUP = 4 # ll_dict_lookup OS_THREADLOCALREF_GET = 5 # llop.threadlocalref_get - OS_GET_ERRNO = 6 # rposix.get_errno - OS_SET_ERRNO = 7 # rposix.set_errno OS_NOT_IN_TRACE = 8 # for calls not recorded in the jit trace # OS_STR_CONCAT = 22 # "stroruni.concat" diff --git a/rpython/jit/codewriter/flatten.py b/rpython/jit/codewriter/flatten.py --- a/rpython/jit/codewriter/flatten.py +++ b/rpython/jit/codewriter/flatten.py @@ -243,55 +243,39 @@ else: # A switch. # - def emitdefaultpath(): - if block.exits[-1].exitcase == 'default': - self.make_link(block.exits[-1]) - else: - self.emitline("unreachable") - self.emitline("---") - # - self.emitline('-live-') switches = [link for link in block.exits if link.exitcase != 'default'] switches.sort(key=lambda link: link.llexitcase) kind = getkind(block.exitswitch.concretetype) - if len(switches) >= 5 and kind == 'int': - # A large switch on an integer, implementable efficiently - # with the help of a SwitchDictDescr - from rpython.jit.codewriter.jitcode import SwitchDictDescr - switchdict = SwitchDictDescr() - switchdict._labels = [] - self.emitline('switch', self.getcolor(block.exitswitch), - switchdict) - emitdefaultpath() - # - for switch in switches: - key = lltype.cast_primitive(lltype.Signed, - switch.llexitcase) - switchdict._labels.append((key, TLabel(switch))) - # emit code for that path - self.emitline(Label(switch)) - self.make_link(switch) + assert kind == 'int' # XXX # + # A switch on an integer, implementable efficiently with the + # help of a SwitchDictDescr. We use this even if there are + # very few cases: in pyjitpl.py, opimpl_switch() will promote + # the int only if it matches one of the cases. + from rpython.jit.codewriter.jitcode import SwitchDictDescr + switchdict = SwitchDictDescr() + switchdict._labels = [] + self.emitline('-live-') # for 'guard_value' + self.emitline('switch', self.getcolor(block.exitswitch), + switchdict) + # emit the default path + if block.exits[-1].exitcase == 'default': + self.make_link(block.exits[-1]) else: - # A switch with several possible answers, though not too - # many of them -- a chain of int_eq comparisons is fine - assert kind == 'int' # XXX - color = self.getcolor(block.exitswitch) - self.emitline('int_guard_value', color) - for switch in switches: - # make the case described by 'switch' - self.emitline('goto_if_not_int_eq', - color, - Constant(switch.llexitcase, - block.exitswitch.concretetype), - TLabel(switch)) - # emit code for the "taken" path - self.make_link(switch) - # finally, emit the label for the "non-taken" path - self.emitline(Label(switch)) - # - emitdefaultpath() + self.emitline("unreachable") + self.emitline("---") + # + for switch in switches: + key = lltype.cast_primitive(lltype.Signed, + switch.llexitcase) + switchdict._labels.append((key, TLabel(switch))) + # emit code for that path + # note: we need a -live- for all the 'guard_false' we produce + # if the switched value doesn't match any case. + self.emitline(Label(switch)) + self.emitline('-live-') + self.make_link(switch) def insert_renamings(self, link): renamings = {} diff --git a/rpython/jit/codewriter/heaptracker.py b/rpython/jit/codewriter/heaptracker.py --- a/rpython/jit/codewriter/heaptracker.py +++ b/rpython/jit/codewriter/heaptracker.py @@ -1,6 +1,7 @@ from rpython.rtyper.lltypesystem import lltype, llmemory from rpython.rtyper import rclass from rpython.rlib.objectmodel import we_are_translated +from rpython.rlib.rarithmetic import r_uint, intmask def adr2int(addr): @@ -11,6 +12,14 @@ def int2adr(int): return llmemory.cast_int_to_adr(int) +def int_signext(value, numbytes): + b8 = numbytes * 8 + a = r_uint(value) + a += r_uint(1 << (b8 - 1)) # a += 128 + a &= r_uint((1 << b8) - 1) # a &= 255 + a -= r_uint(1 << (b8 - 1)) # a -= 128 + return intmask(a) + def count_fields_if_immutable(STRUCT): assert isinstance(STRUCT, lltype.GcStruct) if STRUCT._hints.get('immutable', False): diff --git a/rpython/jit/codewriter/jitcode.py b/rpython/jit/codewriter/jitcode.py --- a/rpython/jit/codewriter/jitcode.py +++ b/rpython/jit/codewriter/jitcode.py @@ -1,4 +1,4 @@ -from rpython.jit.metainterp.history import AbstractDescr +from rpython.jit.metainterp.history import AbstractDescr, ConstInt from rpython.jit.codewriter import heaptracker from rpython.rlib.objectmodel import we_are_translated @@ -109,6 +109,10 @@ class SwitchDictDescr(AbstractDescr): "Get a 'dict' attribute mapping integer values to bytecode positions." + def attach(self, as_dict): + self.dict = as_dict + self.const_keys_in_order = map(ConstInt, sorted(as_dict.keys())) + def __repr__(self): dict = getattr(self, 'dict', '?') return '<SwitchDictDescr %s>' % (dict,) @@ -117,26 +121,6 @@ raise NotImplementedError -class ThreadLocalRefDescr(AbstractDescr): - # A special descr used as the extradescr in a call to a - # threadlocalref_get function. If the backend supports it, - # it can use this 'get_tlref_addr()' to get the address *in the - # current thread* of the thread-local variable. If, on the current - # platform, the "__thread" variables are implemented as an offset - # from some base register (e.g. %fs on x86-64), then the backend will - # immediately substract the current value of the base register. - # This gives an offset from the base register, and this can be - # written down in an assembler instruction to load the "__thread" - # variable from anywhere. - - def __init__(self, opaque_id): - from rpython.rtyper.lltypesystem.lloperation import llop - from rpython.rtyper.lltypesystem import llmemory - def get_tlref_addr(): - return llop.threadlocalref_getaddr(llmemory.Address, opaque_id) - self.get_tlref_addr = get_tlref_addr - - class LiveVarsInfo(object): def __init__(self, live_i, live_r, live_f): self.live_i = live_i diff --git a/rpython/jit/codewriter/jtransform.py b/rpython/jit/codewriter/jtransform.py --- a/rpython/jit/codewriter/jtransform.py +++ b/rpython/jit/codewriter/jtransform.py @@ -439,8 +439,6 @@ elif oopspec_name.endswith('dict.lookup'): # also ordereddict.lookup prepare = self._handle_dict_lookup_call - elif oopspec_name.startswith('rposix.'): - prepare = self._handle_rposix_call else: prepare = self.prepare_builtin_call try: @@ -1267,19 +1265,12 @@ result = [] if min2: - c_min2 = Constant(min2, lltype.Signed) - v2 = varoftype(lltype.Signed) - result.append(SpaceOperation('int_sub', [v_arg, c_min2], v2)) + c_bytes = Constant(size2, lltype.Signed) + result.append(SpaceOperation('int_signext', [v_arg, c_bytes], + v_result)) else: - v2 = v_arg - c_mask = Constant(int((1 << (8 * size2)) - 1), lltype.Signed) - if min2: - v3 = varoftype(lltype.Signed) - else: - v3 = v_result - result.append(SpaceOperation('int_and', [v2, c_mask], v3)) - if min2: - result.append(SpaceOperation('int_add', [v3, c_min2], v_result)) + c_mask = Constant(int((1 << (8 * size2)) - 1), lltype.Signed) + result.append(SpaceOperation('int_and', [v_arg, c_mask], v_result)) return result def _float_to_float_cast(self, v_arg, v_result): @@ -1986,16 +1977,6 @@ else: raise NotImplementedError(oopspec_name) - def _handle_rposix_call(self, op, oopspec_name, args): - if oopspec_name == 'rposix.get_errno': - return self._handle_oopspec_call(op, args, EffectInfo.OS_GET_ERRNO, - EffectInfo.EF_CANNOT_RAISE) - elif oopspec_name == 'rposix.set_errno': - return self._handle_oopspec_call(op, args, EffectInfo.OS_SET_ERRNO, - EffectInfo.EF_CANNOT_RAISE) - else: - raise NotImplementedError(oopspec_name) - def rewrite_op_ll_read_timestamp(self, op): op1 = self.prepare_builtin_call(op, "ll_read_timestamp", []) return self.handle_residual_call(op1, @@ -2012,16 +1993,15 @@ return [op0, op1] def rewrite_op_threadlocalref_get(self, op): - from rpython.jit.codewriter.jitcode import ThreadLocalRefDescr - opaqueid = op.args[0].value - op1 = self.prepare_builtin_call(op, 'threadlocalref_getter', [], - extra=(opaqueid,), - extrakey=opaqueid._obj) - extradescr = ThreadLocalRefDescr(opaqueid) + # only supports RESTYPE being exactly one word. + RESTYPE = op.result.concretetype + assert (RESTYPE in (lltype.Signed, lltype.Unsigned, llmemory.Address) + or isinstance(RESTYPE, lltype.Ptr)) + c_offset, = op.args + op1 = self.prepare_builtin_call(op, 'threadlocalref_get', [c_offset]) return self.handle_residual_call(op1, oopspecindex=EffectInfo.OS_THREADLOCALREF_GET, - extraeffect=EffectInfo.EF_LOOPINVARIANT, - extradescr=[extradescr]) + extraeffect=EffectInfo.EF_LOOPINVARIANT) # ____________________________________________________________ diff --git a/rpython/jit/codewriter/support.py b/rpython/jit/codewriter/support.py --- a/rpython/jit/codewriter/support.py +++ b/rpython/jit/codewriter/support.py @@ -702,10 +702,9 @@ build_ll_1_raw_free_no_track_allocation = ( build_raw_free_builder(track_allocation=False)) - def build_ll_0_threadlocalref_getter(opaqueid): - def _ll_0_threadlocalref_getter(): - return llop.threadlocalref_get(rclass.OBJECTPTR, opaqueid) - return _ll_0_threadlocalref_getter + def _ll_1_threadlocalref_get(TP, offset): + return llop.threadlocalref_get(TP, offset) + _ll_1_threadlocalref_get.need_result_type = 'exact' # don't deref def _ll_1_weakref_create(obj): return llop.weakref_create(llmemory.WeakRefPtr, obj) @@ -818,8 +817,18 @@ s_result = lltype_to_annotation(ll_res) impl = setup_extra_builtin(rtyper, oopspec_name, len(args_s), extra) if getattr(impl, 'need_result_type', False): - bk = rtyper.annotator.bookkeeper - args_s.insert(0, annmodel.SomePBC([bk.getdesc(deref(ll_res))])) + if hasattr(rtyper, 'annotator'): + bk = rtyper.annotator.bookkeeper + ll_restype = ll_res + if impl.need_result_type != 'exact': + ll_restype = deref(ll_restype) + desc = bk.getdesc(ll_restype) + else: + class TestingDesc(object): + knowntype = int + pyobj = None + desc = TestingDesc() + args_s.insert(0, annmodel.SomePBC([desc])) # if hasattr(rtyper, 'annotator'): # regular case mixlevelann = MixLevelHelperAnnotator(rtyper) diff --git a/rpython/jit/codewriter/test/test_flatten.py b/rpython/jit/codewriter/test/test_flatten.py --- a/rpython/jit/codewriter/test/test_flatten.py +++ b/rpython/jit/codewriter/test/test_flatten.py @@ -1,5 +1,6 @@ import py, sys from rpython.jit.codewriter import support +from rpython.jit.codewriter.heaptracker import int_signext from rpython.jit.codewriter.flatten import flatten_graph, reorder_renaming_list from rpython.jit.codewriter.flatten import GraphFlattener, ListOfKind, Register from rpython.jit.codewriter.format import assert_format @@ -281,30 +282,6 @@ foobar hi_there! """) - def test_switch(self): - def f(n): - if n == -5: return 12 - elif n == 2: return 51 - elif n == 7: return 1212 - else: return 42 - self.encoding_test(f, [65], """ - -live- - int_guard_value %i0 - goto_if_not_int_eq %i0, $-5, L1 - int_return $12 - --- - L1: - goto_if_not_int_eq %i0, $2, L2 - int_return $51 - --- - L2: - goto_if_not_int_eq %i0, $7, L3 - int_return $1212 - --- - L3: - int_return $42 - """) - def test_switch_dict(self): def f(x): if x == 1: return 61 @@ -320,21 +297,27 @@ int_return $-1 --- L1: + -live- int_return $61 --- L2: + -live- int_return $511 --- L3: + -live- int_return $-22 --- L4: + -live- int_return $81 --- L5: + -live- int_return $17 --- L6: + -live- int_return $54 """) @@ -780,53 +763,37 @@ (rffi.SIGNEDCHAR, rffi.LONG, ""), (rffi.SIGNEDCHAR, rffi.ULONG, ""), - (rffi.UCHAR, rffi.SIGNEDCHAR, """int_sub %i0, $-128 -> %i1 - int_and %i1, $255 -> %i2 - int_add %i2, $-128 -> %i3"""), + (rffi.UCHAR, rffi.SIGNEDCHAR, "int_signext %i0, $1 -> %i1"), (rffi.UCHAR, rffi.UCHAR, ""), (rffi.UCHAR, rffi.SHORT, ""), (rffi.UCHAR, rffi.USHORT, ""), (rffi.UCHAR, rffi.LONG, ""), (rffi.UCHAR, rffi.ULONG, ""), - (rffi.SHORT, rffi.SIGNEDCHAR, """int_sub %i0, $-128 -> %i1 - int_and %i1, $255 -> %i2 - int_add %i2, $-128 -> %i3"""), + (rffi.SHORT, rffi.SIGNEDCHAR, "int_signext %i0, $1 -> %i1"), (rffi.SHORT, rffi.UCHAR, "int_and %i0, $255 -> %i1"), (rffi.SHORT, rffi.SHORT, ""), (rffi.SHORT, rffi.USHORT, "int_and %i0, $65535 -> %i1"), (rffi.SHORT, rffi.LONG, ""), (rffi.SHORT, rffi.ULONG, ""), - (rffi.USHORT, rffi.SIGNEDCHAR, """int_sub %i0, $-128 -> %i1 - int_and %i1, $255 -> %i2 - int_add %i2, $-128 -> %i3"""), + (rffi.USHORT, rffi.SIGNEDCHAR, "int_signext %i0, $1 -> %i1"), (rffi.USHORT, rffi.UCHAR, "int_and %i0, $255 -> %i1"), - (rffi.USHORT, rffi.SHORT, """int_sub %i0, $-32768 -> %i1 - int_and %i1, $65535 -> %i2 - int_add %i2, $-32768 -> %i3"""), + (rffi.USHORT, rffi.SHORT, "int_signext %i0, $2 -> %i1"), (rffi.USHORT, rffi.USHORT, ""), (rffi.USHORT, rffi.LONG, ""), (rffi.USHORT, rffi.ULONG, ""), - (rffi.LONG, rffi.SIGNEDCHAR, """int_sub %i0, $-128 -> %i1 - int_and %i1, $255 -> %i2 - int_add %i2, $-128 -> %i3"""), + (rffi.LONG, rffi.SIGNEDCHAR, "int_signext %i0, $1 -> %i1"), (rffi.LONG, rffi.UCHAR, "int_and %i0, $255 -> %i1"), - (rffi.LONG, rffi.SHORT, """int_sub %i0, $-32768 -> %i1 - int_and %i1, $65535 -> %i2 - int_add %i2, $-32768 -> %i3"""), + (rffi.LONG, rffi.SHORT, "int_signext %i0, $2 -> %i1"), (rffi.LONG, rffi.USHORT, "int_and %i0, $65535 -> %i1"), (rffi.LONG, rffi.LONG, ""), (rffi.LONG, rffi.ULONG, ""), - (rffi.ULONG, rffi.SIGNEDCHAR, """int_sub %i0, $-128 -> %i1 - int_and %i1, $255 -> %i2 - int_add %i2, $-128 -> %i3"""), + (rffi.ULONG, rffi.SIGNEDCHAR, "int_signext %i0, $1 -> %i1"), (rffi.ULONG, rffi.UCHAR, "int_and %i0, $255 -> %i1"), - (rffi.ULONG, rffi.SHORT, """int_sub %i0, $-32768 -> %i1 - int_and %i1, $65535 -> %i2 - int_add %i2, $-32768 -> %i3"""), + (rffi.ULONG, rffi.SHORT, "int_signext %i0, $2 -> %i1"), (rffi.ULONG, rffi.USHORT, "int_and %i0, $65535 -> %i1"), (rffi.ULONG, rffi.LONG, ""), (rffi.ULONG, rffi.ULONG, ""), @@ -910,18 +877,14 @@ return rffi.cast(rffi.SIGNEDCHAR, n) self.encoding_test(f, [12.456], """ cast_float_to_int %f0 -> %i0 - int_sub %i0, $-128 -> %i1 - int_and %i1, $255 -> %i2 - int_add %i2, $-128 -> %i3 - int_return %i3 + int_signext %i0, $1 -> %i1 + int_return %i1 """, transform=True) self.encoding_test(f, [rffi.cast(lltype.SingleFloat, 12.456)], """ cast_singlefloat_to_float %i0 -> %f0 cast_float_to_int %f0 -> %i1 - int_sub %i1, $-128 -> %i2 - int_and %i2, $255 -> %i3 - int_add %i3, $-128 -> %i4 - int_return %i4 + int_signext %i1, $1 -> %i2 + int_return %i2 """, transform=True) def f(dbl): @@ -1068,9 +1031,12 @@ match = r.match(op) assert match, "line %r does not match regexp" % (op,) opname = match.group(1) - if opname == 'int_add': value += int(match.group(2)) - elif opname == 'int_sub': value -= int(match.group(2)) - elif opname == 'int_and': value &= int(match.group(2)) - else: assert 0, opname + if opname == 'int_and': + value &= int(match.group(2)) + elif opname == 'int_signext': + numbytes = int(match.group(2)) + value = int_signext(value, numbytes) + else: + assert 0, opname # assert rffi.cast(lltype.Signed, value) == expected_value diff --git a/rpython/jit/codewriter/test/test_jtransform.py b/rpython/jit/codewriter/test/test_jtransform.py --- a/rpython/jit/codewriter/test/test_jtransform.py +++ b/rpython/jit/codewriter/test/test_jtransform.py @@ -148,9 +148,7 @@ EI.OS_UNIEQ_LENGTHOK: ([PUNICODE, PUNICODE], INT), EI.OS_RAW_MALLOC_VARSIZE_CHAR: ([INT], ARRAYPTR), EI.OS_RAW_FREE: ([ARRAYPTR], lltype.Void), - EI.OS_THREADLOCALREF_GET: ([], rclass.OBJECTPTR), - EI.OS_GET_ERRNO: ([], INT), - EI.OS_SET_ERRNO: ([INT], lltype.Void), + EI.OS_THREADLOCALREF_GET: ([INT], INT), # for example } argtypes = argtypes[oopspecindex] assert argtypes[0] == [v.concretetype for v in op.args[1:]] @@ -159,9 +157,7 @@ assert extraeffect == EI.EF_ELIDABLE_CAN_RAISE elif oopspecindex == EI.OS_RAW_MALLOC_VARSIZE_CHAR: assert extraeffect == EI.EF_CAN_RAISE - elif oopspecindex in (EI.OS_RAW_FREE, - EI.OS_GET_ERRNO, - EI.OS_SET_ERRNO): + elif oopspecindex == EI.OS_RAW_FREE: assert extraeffect == EI.EF_CANNOT_RAISE elif oopspecindex == EI.OS_THREADLOCALREF_GET: assert extraeffect == EI.EF_LOOPINVARIANT @@ -1347,53 +1343,20 @@ assert op2 is None def test_threadlocalref_get(): - from rpython.rtyper import rclass - from rpython.rlib.rthread import ThreadLocalReference + from rpython.rlib.rthread import ThreadLocalField + tlfield = ThreadLocalField(lltype.Signed, 'foobar_test_') OS_THREADLOCALREF_GET = effectinfo.EffectInfo.OS_THREADLOCALREF_GET - class Foo: pass - t = ThreadLocalReference(Foo) - v2 = varoftype(rclass.OBJECTPTR) - c_opaqueid = const(t.opaque_id) - op = SpaceOperation('threadlocalref_get', [c_opaqueid], v2) + c = const(tlfield.offset) + v = varoftype(lltype.Signed) + op = SpaceOperation('threadlocalref_get', [c], v) tr = Transformer(FakeCPU(), FakeBuiltinCallControl()) op0 = tr.rewrite_operation(op) - assert op0.opname == 'residual_call_r_r' - assert op0.args[0].value == 'threadlocalref_getter' # pseudo-function as str - assert op0.args[1] == ListOfKind("ref", []) - assert op0.args[2] == 'calldescr-%d' % OS_THREADLOCALREF_GET - assert op0.result == v2 - -def test_get_errno(): - # test that the oopspec is present and correctly transformed - from rpython.rlib import rposix - FUNC = lltype.FuncType([], lltype.Signed) - func = lltype.functionptr(FUNC, 'get_errno', _callable=rposix.get_errno) - v3 = varoftype(lltype.Signed) - op = SpaceOperation('direct_call', [const(func)], v3) - tr = Transformer(FakeCPU(), FakeBuiltinCallControl()) - op1 = tr.rewrite_operation(op) - assert op1.opname == 'residual_call_r_i' - assert op1.args[0].value == func - assert op1.args[1] == ListOfKind('ref', []) - assert op1.args[2] == 'calldescr-%d' % effectinfo.EffectInfo.OS_GET_ERRNO - assert op1.result == v3 - -def test_set_errno(): - # test that the oopspec is present and correctly transformed - from rpython.rlib import rposix - FUNC = lltype.FuncType([lltype.Signed], lltype.Void) - func = lltype.functionptr(FUNC, 'set_errno', _callable=rposix.set_errno) - v1 = varoftype(lltype.Signed) - v3 = varoftype(lltype.Void) - op = SpaceOperation('direct_call', [const(func), v1], v3) - tr = Transformer(FakeCPU(), FakeBuiltinCallControl()) - op1 = tr.rewrite_operation(op) - assert op1.opname == 'residual_call_ir_v' - assert op1.args[0].value == func - assert op1.args[1] == ListOfKind('int', [v1]) - assert op1.args[2] == ListOfKind('ref', []) - assert op1.args[3] == 'calldescr-%d' % effectinfo.EffectInfo.OS_SET_ERRNO - assert op1.result == v3 + assert op0.opname == 'residual_call_ir_i' + assert op0.args[0].value == 'threadlocalref_get' # pseudo-function as str + assert op0.args[1] == ListOfKind("int", [c]) + assert op0.args[2] == ListOfKind("ref", []) + assert op0.args[3] == 'calldescr-%d' % OS_THREADLOCALREF_GET + assert op0.result == v def test_unknown_operation(): op = SpaceOperation('foobar', [], varoftype(lltype.Void)) diff --git a/rpython/jit/metainterp/blackhole.py b/rpython/jit/metainterp/blackhole.py --- a/rpython/jit/metainterp/blackhole.py +++ b/rpython/jit/metainterp/blackhole.py @@ -489,6 +489,9 @@ if i < 0: return 0 return i + @arguments("i", "i", returns="i") + def bhimpl_int_signext(a, b): + return heaptracker.int_signext(a, b) @arguments("i", "i", returns="i") def bhimpl_uint_lt(a, b): diff --git a/rpython/jit/metainterp/optimizeopt/intbounds.py b/rpython/jit/metainterp/optimizeopt/intbounds.py --- a/rpython/jit/metainterp/optimizeopt/intbounds.py +++ b/rpython/jit/metainterp/optimizeopt/intbounds.py @@ -342,6 +342,19 @@ else: self.emit_operation(op) + def optimize_INT_SIGNEXT(self, op): + value = self.getvalue(op.getarg(0)) + numbits = op.getarg(1).getint() * 8 + start = -(1 << (numbits - 1)) + stop = 1 << (numbits - 1) + bounds = IntBound(start, stop - 1) + if bounds.contains_bound(value.intbound): + self.make_equal_to(op.result, value) + else: + self.emit_operation(op) + vres = self.getvalue(op.result) + vres.intbound.intersect(bounds) + def optimize_ARRAYLEN_GC(self, op): self.emit_operation(op) array = self.getvalue(op.getarg(0)) diff --git a/rpython/jit/metainterp/optimizeopt/test/test_optimizebasic.py b/rpython/jit/metainterp/optimizeopt/test/test_optimizebasic.py --- a/rpython/jit/metainterp/optimizeopt/test/test_optimizebasic.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_optimizebasic.py @@ -5494,6 +5494,41 @@ """ self.optimize_loop(ops, expected) + def test_int_signext_already_in_bounds(self): + ops = """ + [i0] + i1 = int_signext(i0, 1) + i2 = int_signext(i1, 2) + jump(i2) + """ + expected = """ + [i0] + i1 = int_signext(i0, 1) + jump(i1) + """ + self.optimize_loop(ops, expected) + # + ops = """ + [i0] + i1 = int_signext(i0, 1) + i2 = int_signext(i1, 1) + jump(i2) + """ + expected = """ + [i0] + i1 = int_signext(i0, 1) + jump(i1) + """ + self.optimize_loop(ops, expected) + # + ops = """ + [i0] + i1 = int_signext(i0, 2) + i2 = int_signext(i1, 1) + jump(i2) + """ + self.optimize_loop(ops, ops) + class TestLLtype(BaseTestOptimizeBasic, LLtypeMixin): pass diff --git a/rpython/jit/metainterp/optimizeopt/test/test_optimizeopt.py b/rpython/jit/metainterp/optimizeopt/test/test_optimizeopt.py --- a/rpython/jit/metainterp/optimizeopt/test/test_optimizeopt.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_optimizeopt.py @@ -1,4 +1,4 @@ -import py +import py, sys from rpython.rlib.objectmodel import instantiate from rpython.jit.metainterp import compile, resume from rpython.jit.metainterp.history import AbstractDescr, ConstInt, BoxInt, TreeLoop @@ -190,6 +190,11 @@ args = [] for _ in range(oparity[opnum]): args.append(random.randrange(1, 20)) + if opnum == rop.INT_SIGNEXT: + # 2nd arg is number of bytes to extend from --- + # must not be too random + args[-1] = random.choice([1, 2] if sys.maxint < 2**32 else + [1, 2, 4]) ops = """ [] i1 = %s(%s) @@ -5607,6 +5612,44 @@ """ self.optimize_loop(ops, ops, ops) + def test_bound_backpropagate_int_signext(self): + ops = """ + [] + i0 = escape() + i1 = int_signext(i0, 1) + i2 = int_eq(i0, i1) + guard_true(i2) [] + i3 = int_le(i0, 127) # implied by equality with int_signext + guard_true(i3) [] + i5 = int_gt(i0, -129) # implied by equality with int_signext + guard_true(i5) [] + jump() + """ + expected = """ + [] + i0 = escape() + i1 = int_signext(i0, 1) + i2 = int_eq(i0, i1) + guard_true(i2) [] + jump() + """ + self.optimize_loop(ops, expected) + + def test_bound_backpropagate_int_signext_2(self): + ops = """ + [] + i0 = escape() + i1 = int_signext(i0, 1) + i2 = int_eq(i0, i1) + guard_true(i2) [] + i3 = int_le(i0, 126) # false for i1 == 127 + guard_true(i3) [] + i5 = int_gt(i0, -128) # false for i1 == -128 + guard_true(i5) [] + jump() + """ + self.optimize_loop(ops, ops) + def test_mul_ovf(self): ops = """ [i0, i1] diff --git a/rpython/jit/metainterp/pyjitpl.py b/rpython/jit/metainterp/pyjitpl.py --- a/rpython/jit/metainterp/pyjitpl.py +++ b/rpython/jit/metainterp/pyjitpl.py @@ -197,7 +197,7 @@ # ------------------------------ for _opimpl in ['int_add', 'int_sub', 'int_mul', 'int_floordiv', 'int_mod', - 'int_and', 'int_or', 'int_xor', + 'int_and', 'int_or', 'int_xor', 'int_signext', 'int_rshift', 'int_lshift', 'uint_rshift', 'uint_lt', 'uint_le', 'uint_gt', 'uint_ge', 'uint_floordiv', @@ -402,13 +402,26 @@ @arguments("box", "descr", "orgpc") def opimpl_switch(self, valuebox, switchdict, orgpc): - box = self.implement_guard_value(valuebox, orgpc) - search_value = box.getint() + search_value = valuebox.getint() assert isinstance(switchdict, SwitchDictDescr) try: - self.pc = switchdict.dict[search_value] + target = switchdict.dict[search_value] except KeyError: - pass + # None of the cases match. Fall back to generating a chain + # of 'int_eq'. + # xxx as a minor optimization, if that's a bridge, then we would + # not need the cases that we already tested (and failed) with + # 'guard_value'. How to do it is not very clear though. + for const1 in switchdict.const_keys_in_order: + box = self.execute(rop.INT_EQ, valuebox, const1) + assert box.getint() == 0 + target = switchdict.dict[const1.getint()] + self.metainterp.generate_guard(rop.GUARD_FALSE, box, + resumepc=target) + else: + # found one of the cases + self.implement_guard_value(valuebox, orgpc) + self.pc = target @arguments() def opimpl_unreachable(self): @@ -2270,8 +2283,8 @@ if opnum == rop.GUARD_TRUE: # a goto_if_not that jumps only now if not dont_change_position: frame.pc = frame.jitcode.follow_jump(frame.pc) - elif opnum == rop.GUARD_FALSE: # a goto_if_not that stops jumping - pass + elif opnum == rop.GUARD_FALSE: # a goto_if_not that stops jumping; + pass # or a switch that was in its "default" case elif opnum == rop.GUARD_VALUE or opnum == rop.GUARD_CLASS: pass # the pc is already set to the *start* of the opcode elif (opnum == rop.GUARD_NONNULL or diff --git a/rpython/jit/metainterp/resoperation.py b/rpython/jit/metainterp/resoperation.py --- a/rpython/jit/metainterp/resoperation.py +++ b/rpython/jit/metainterp/resoperation.py @@ -419,6 +419,7 @@ 'INT_RSHIFT/2', 'INT_LSHIFT/2', 'UINT_RSHIFT/2', + 'INT_SIGNEXT/2', 'FLOAT_ADD/2', 'FLOAT_SUB/2', 'FLOAT_MUL/2', diff --git a/rpython/jit/metainterp/test/test_ajit.py b/rpython/jit/metainterp/test/test_ajit.py --- a/rpython/jit/metainterp/test/test_ajit.py +++ b/rpython/jit/metainterp/test/test_ajit.py @@ -698,6 +698,40 @@ res = self.interp_operations(f, [12311]) assert res == 42 + def test_switch_bridges(self): + from rpython.rlib.rarithmetic import intmask + myjitdriver = JitDriver(greens = [], reds = 'auto') + lsts = [[-5, 2, 20] * 6, + [7, 123, 2] * 6, + [12311, -5, 7] * 6, + [7, 123, 2] * 4 + [-5, -5, -5] * 2, + [7, 123, 2] * 4 + [-5, -5, -5] * 2 + [12311, 12311, 12311], + ] + def f(case): + x = 0 + i = 0 + lst = lsts[case] + while i < len(lst): + myjitdriver.jit_merge_point() + n = lst[i] + if n == -5: a = 5 + elif n == 2: a = 4 + elif n == 7: a = 3 + else: a = 2 + x = intmask(x * 10 + a) + i += 1 + return x + res = self.meta_interp(f, [0], backendopt=True) + assert res == intmask(542 * 1001001001001001) + res = self.meta_interp(f, [1], backendopt=True) + assert res == intmask(324 * 1001001001001001) + res = self.meta_interp(f, [2], backendopt=True) + assert res == intmask(253 * 1001001001001001) + res = self.meta_interp(f, [3], backendopt=True) + assert res == intmask(324324324324555555) + res = self.meta_interp(f, [4], backendopt=True) + assert res == intmask(324324324324555555222) + def test_r_uint(self): from rpython.rlib.rarithmetic import r_uint myjitdriver = JitDriver(greens = [], reds = ['y']) @@ -833,23 +867,6 @@ assert type(res) == bool assert not res - def test_switch_dict(self): - def f(x): - if x == 1: return 61 - elif x == 2: return 511 - elif x == 3: return -22 - elif x == 4: return 81 - elif x == 5: return 17 - elif x == 6: return 54 - elif x == 7: return 987 - elif x == 8: return -12 - elif x == 9: return 321 - return -1 - res = self.interp_operations(f, [5]) - assert res == 17 - res = self.interp_operations(f, [15]) - assert res == -1 - def test_int_add_ovf(self): def f(x, y): try: @@ -3048,6 +3065,16 @@ res = self.meta_interp(f, [32]) assert res == f(32) + def test_int_signext(self): + def f(n): + return rffi.cast(rffi.SIGNEDCHAR, n) + res = self.interp_operations(f, [128]) + assert res == -128 + res = self.interp_operations(f, [-35 + 256 * 29]) + assert res == -35 + res = self.interp_operations(f, [127 - 256 * 29]) + assert res == 127 + class BaseLLtypeTests(BasicTests): def test_identityhash(self): diff --git a/rpython/jit/metainterp/test/test_threadlocal.py b/rpython/jit/metainterp/test/test_threadlocal.py --- a/rpython/jit/metainterp/test/test_threadlocal.py +++ b/rpython/jit/metainterp/test/test_threadlocal.py @@ -1,29 +1,21 @@ import py +from rpython.rlib import rthread from rpython.jit.metainterp.test.support import LLJitMixin -from rpython.rlib.rthread import ThreadLocalReference -from rpython.rlib.jit import dont_look_inside +from rpython.rtyper.lltypesystem import lltype +from rpython.rtyper.lltypesystem.lloperation import llop class ThreadLocalTest(object): def test_threadlocalref_get(self): - class Foo: - pass - t = ThreadLocalReference(Foo) - x = Foo() - - @dont_look_inside - def setup(): - t.set(x) _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit