Author: Maciej Fijalkowski <fij...@gmail.com> Branch: share-guard-info Changeset: r79871:be6fe02ea18a Date: 2015-09-27 20:01 +0200 http://bitbucket.org/pypy/pypy/changeset/be6fe02ea18a/
Log: merge default diff --git a/pypy/doc/extending.rst b/pypy/doc/extending.rst --- a/pypy/doc/extending.rst +++ b/pypy/doc/extending.rst @@ -5,8 +5,8 @@ with any external library. Right now, there are the following possibilities of providing -third-party modules for the PyPy python interpreter (in order of -usefulness): +third-party modules for the PyPy python interpreter (in order, from most +directly useful to most messy to use with PyPy): * Write them in pure Python and use CFFI_. diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -33,3 +33,13 @@ .. branch: remember-tracing-counts Reenable jithooks + +.. branch: detect_egd2 + +.. branch: shadowstack-no-move-2 +Issue #2141: fix a crash on Windows and OS/X and ARM when running +at least 20 threads. + +.. branch: numpy-ctypes + +Add support for ndarray.ctypes property. diff --git a/pypy/module/_cffi_backend/ctypeprim.py b/pypy/module/_cffi_backend/ctypeprim.py --- a/pypy/module/_cffi_backend/ctypeprim.py +++ b/pypy/module/_cffi_backend/ctypeprim.py @@ -125,12 +125,24 @@ cdata[0] = value +# XXX explicitly use an integer type instead of lltype.UniChar here, +# because for now the latter is defined as unsigned by RPython (even +# though it may be signed when 'wchar_t' is written to C). +WCHAR_INT = {(2, False): rffi.USHORT, + (4, False): rffi.UINT, + (4, True): rffi.INT}[rffi.sizeof(lltype.UniChar), rffi.r_wchar_t.SIGN] +WCHAR_INTP = rffi.CArrayPtr(WCHAR_INT) + class W_CTypePrimitiveUniChar(W_CTypePrimitiveCharOrUniChar): _attrs_ = [] + if rffi.r_wchar_t.SIGN: + def write_raw_integer_data(self, w_cdata, value): + w_cdata.write_raw_signed_data(value) + def cast_to_int(self, cdata): - unichardata = rffi.cast(rffi.CWCHARP, cdata) - return self.space.wrap(ord(unichardata[0])) + unichardata = rffi.cast(WCHAR_INTP, cdata) + return self.space.wrap(unichardata[0]) def convert_to_object(self, cdata): unichardata = rffi.cast(rffi.CWCHARP, cdata) diff --git a/pypy/module/_vmprof/test/test__vmprof.py b/pypy/module/_vmprof/test/test__vmprof.py --- a/pypy/module/_vmprof/test/test__vmprof.py +++ b/pypy/module/_vmprof/test/test__vmprof.py @@ -34,6 +34,7 @@ i += 1 _, size = struct.unpack("ll", s[i:i + 2 * WORD]) i += 2 * WORD + size * struct.calcsize("P") + i += WORD # thread id elif s[i] == '\x02': i += 1 _, size = struct.unpack("ll", s[i:i + 2 * WORD]) diff --git a/pypy/module/micronumpy/__init__.py b/pypy/module/micronumpy/__init__.py --- a/pypy/module/micronumpy/__init__.py +++ b/pypy/module/micronumpy/__init__.py @@ -9,6 +9,7 @@ 'ndarray': 'ndarray.W_NDimArray', 'dtype': 'descriptor.W_Dtype', 'flatiter': 'flatiter.W_FlatIterator', + 'flagsobj': 'flagsobj.W_FlagsObject', '_reconstruct' : 'ndarray._reconstruct', 'scalar' : 'ctors.build_scalar', diff --git a/pypy/module/micronumpy/boxes.py b/pypy/module/micronumpy/boxes.py --- a/pypy/module/micronumpy/boxes.py +++ b/pypy/module/micronumpy/boxes.py @@ -147,7 +147,7 @@ def get_flags(self): return (NPY.ARRAY_C_CONTIGUOUS | NPY.ARRAY_F_CONTIGUOUS | - NPY.ARRAY_WRITEABLE | NPY.ARRAY_OWNDATA) + NPY.ARRAY_ALIGNED | NPY.ARRAY_OWNDATA) def item(self, space): return self.get_dtype(space).itemtype.to_builtin_type(space, self) diff --git a/pypy/module/micronumpy/concrete.py b/pypy/module/micronumpy/concrete.py --- a/pypy/module/micronumpy/concrete.py +++ b/pypy/module/micronumpy/concrete.py @@ -1,6 +1,7 @@ from pypy.interpreter.error import OperationError, oefmt from rpython.rlib import jit, rgc from rpython.rlib.rarithmetic import ovfcheck +from rpython.rlib.listsort import make_timsort_class from rpython.rlib.buffer import Buffer from rpython.rlib.debug import make_sure_not_resized from rpython.rlib.rawstorage import alloc_raw_storage, free_raw_storage, \ @@ -17,6 +18,19 @@ is_f_contiguous) from rpython.rlib.objectmodel import keepalive_until_here +TimSort = make_timsort_class() +class StrideSort(TimSort): + ''' + argsort (return the indices to sort) a list of strides + ''' + def __init__(self, rangelist, strides): + self.strides = strides + TimSort.__init__(self, rangelist) + + def lt(self, a, b): + return self.strides[a] < self.strides[b] + + class BaseConcreteArray(object): _immutable_fields_ = ['dtype?', 'storage', 'start', 'size', 'shape[*]', 'strides[*]', 'backstrides[*]', 'order', 'gcstruct', @@ -354,12 +368,15 @@ elif order != self.order: t_strides, backstrides = calc_strides(shape, dtype, order) else: - mins = strides[0] + indx_array = range(len(strides)) + list_sorter = StrideSort(indx_array, strides) + list_sorter.sort() t_elsize = dtype.elsize - for s in strides: - if s < mins: - mins = s - t_strides = [s * t_elsize / mins for s in strides] + t_strides = strides[:] + base = dtype.elsize + for i in indx_array: + t_strides[i] = base + base *= shape[i] backstrides = calc_backstrides(t_strides, shape) impl = ConcreteArray(shape, dtype, order, t_strides, backstrides) loop.setslice(space, impl.get_shape(), impl, self) @@ -551,6 +568,11 @@ self.size = ovfcheck(support.product_check(shape) * self.dtype.elsize) except OverflowError: raise oefmt(dtype.itemtype.space.w_ValueError, "array is too big.") + while orig_arr is not None: + assert isinstance(orig_arr, W_NDimArray) + if orig_arr.implementation.base() is None: + break + orig_arr = orig_arr.implementation.base() self.start = start self.orig_arr = orig_arr flags = parent.flags & NPY.ARRAY_ALIGNED diff --git a/pypy/module/micronumpy/flagsobj.py b/pypy/module/micronumpy/flagsobj.py --- a/pypy/module/micronumpy/flagsobj.py +++ b/pypy/module/micronumpy/flagsobj.py @@ -57,6 +57,9 @@ self.flags & NPY.ARRAY_F_CONTIGUOUS or self.flags & NPY.ARRAY_C_CONTIGUOUS )) + def descr_get_num(self, space): + return space.wrap(self.flags) + def descr_getitem(self, space, w_item): key = space.str_w(w_item) if key == "C" or key == "CONTIGUOUS" or key == "C_CONTIGUOUS": @@ -122,4 +125,5 @@ aligned = GetSetProperty(W_FlagsObject.descr_get_aligned), fnc = GetSetProperty(W_FlagsObject.descr_get_fnc), forc = GetSetProperty(W_FlagsObject.descr_get_forc), + num = GetSetProperty(W_FlagsObject.descr_get_num), ) diff --git a/pypy/module/micronumpy/ndarray.py b/pypy/module/micronumpy/ndarray.py --- a/pypy/module/micronumpy/ndarray.py +++ b/pypy/module/micronumpy/ndarray.py @@ -747,8 +747,12 @@ return out def descr_get_ctypes(self, space): - raise OperationError(space.w_NotImplementedError, space.wrap( - "ctypes not implemented yet")) + w_result = space.appexec([self], """(arr): + from numpy.core import _internal + p_data = arr.__array_interface__['data'][0] + return _internal._ctypes(arr, p_data) + """) + return w_result def buffer_w(self, space, flags): return self.implementation.get_buffer(space, True) diff --git a/pypy/module/micronumpy/test/test_flagsobj.py b/pypy/module/micronumpy/test/test_flagsobj.py --- a/pypy/module/micronumpy/test/test_flagsobj.py +++ b/pypy/module/micronumpy/test/test_flagsobj.py @@ -30,6 +30,7 @@ assert a.flags.forc == True assert a.flags['FNC'] == False assert a.flags['FORC'] == True + assert a.flags.num == 1287 raises(KeyError, "a.flags['blah']") raises(KeyError, "a.flags['C_CONTIGUOUS'] = False") raises((TypeError, AttributeError), "a.flags.c_contiguous = False") @@ -38,6 +39,7 @@ import numpy as np a = np.int32(2) assert a.flags.c_contiguous == True + assert a.flags.num == 263 def test_compare(self): import numpy as np diff --git a/pypy/module/micronumpy/test/test_ndarray.py b/pypy/module/micronumpy/test/test_ndarray.py --- a/pypy/module/micronumpy/test/test_ndarray.py +++ b/pypy/module/micronumpy/test/test_ndarray.py @@ -2218,7 +2218,7 @@ assert _weakref.ref(a) def test_astype(self): - from numpy import array, arange + from numpy import array, arange, empty b = array(1).astype(float) assert b == 1 assert b.dtype == float @@ -2273,14 +2273,36 @@ b = a.astype('f4', order='C', copy=False) assert a is b + a = empty([3, 3, 3, 3], 'uint8') + a[:] = 0 + b = a[2] + c = b[:, :2, :] + d = c.swapaxes(1, -1) + e = d.astype('complex128') + assert e.shape == (3, 3, 2) + assert e.strides == (96, 16, 48) + assert (e.real == d).all() + def test_base(self): - from numpy import array + from numpy import array, empty assert array(1).base is None assert array([1, 2]).base is None a = array([1, 2, 3, 4]) b = a[::2] assert b.base is a + a = empty([3, 3, 3, 3], 'uint8') + a[:] = 0 + b = a[2] + assert b.base.base is None + c = b[:, :2, :] + d = c.swapaxes(1, -1) + assert c.base.base is None + assert d.base.base is None + assert d.shape == (3, 3, 2) + assert d.__array_interface__['data'][0] == \ + a.__array_interface__['data'][0] + a.strides[0] * 2 + def test_byteswap(self): from numpy import array diff --git a/pypy/module/thread/test/test_lock.py b/pypy/module/thread/test/test_lock.py --- a/pypy/module/thread/test/test_lock.py +++ b/pypy/module/thread/test/test_lock.py @@ -123,23 +123,26 @@ self.sig_recvd = True old_handler = signal.signal(signal.SIGUSR1, my_handler) try: + ready = thread.allocate_lock() + ready.acquire() def other_thread(): # Acquire the lock in a non-main thread, so this test works for # RLocks. lock.acquire() - # Wait until the main thread is blocked in the lock acquire, and - # then wake it up with this. - time.sleep(0.5) + # Notify the main thread that we're ready + ready.release() + # Wait for 5 seconds here + for n in range(50): + time.sleep(0.1) + # Send the signal os.kill(os.getpid(), signal.SIGUSR1) # Let the main thread take the interrupt, handle it, and retry # the lock acquisition. Then we'll let it run. - time.sleep(0.5) + for n in range(50): + time.sleep(0.1) lock.release() thread.start_new_thread(other_thread, ()) - # Wait until we can't acquire it without blocking... - while lock.acquire(blocking=False): - lock.release() - time.sleep(0.01) + ready.acquire() result = lock.acquire() # Block while we receive a signal. assert self.sig_recvd assert result diff --git a/pypy/objspace/std/floatobject.py b/pypy/objspace/std/floatobject.py --- a/pypy/objspace/std/floatobject.py +++ b/pypy/objspace/std/floatobject.py @@ -4,6 +4,7 @@ from rpython.rlib import rarithmetic, rfloat from rpython.rlib.rarithmetic import LONG_BIT, intmask, ovfcheck_float_to_int +from rpython.rlib.rarithmetic import int_between from rpython.rlib.rbigint import rbigint from rpython.rlib.rfloat import ( DTSF_ADD_DOT_0, DTSF_STR_PRECISION, INFINITY, NAN, copysign, @@ -121,10 +122,11 @@ if space.isinstance_w(w_other, space.w_int): f1 = self.floatval i2 = space.int_w(w_other) - f2 = float(i2) - if LONG_BIT > 32 and int(f2) != i2: + # (double-)floats have always at least 48 bits of precision + if LONG_BIT > 32 and not int_between((-1)<<48, i2, 1<<48): res = do_compare_bigint(f1, rbigint.fromint(i2)) else: + f2 = float(i2) res = op(f1, f2) return space.newbool(res) if space.isinstance_w(w_other, space.w_long): diff --git a/pypy/objspace/std/test/test_floatobject.py b/pypy/objspace/std/test/test_floatobject.py --- a/pypy/objspace/std/test/test_floatobject.py +++ b/pypy/objspace/std/test/test_floatobject.py @@ -840,3 +840,12 @@ check(mod(0.0, -1.0), -0.0) check(mod(1e-100, -1.0), -1.0) check(mod(1.0, -1.0), -0.0) + + def test_equality_rounding(self): + i = int(2 ** 63 - 1) + f = float(i) # not enough precision, becomes 2.0 ** 63 + assert f == 2.0 ** 63 + assert i != f + assert f != i + assert long(i) != f + assert f != long(i) diff --git a/rpython/jit/metainterp/optimizeopt/heap.py b/rpython/jit/metainterp/optimizeopt/heap.py --- a/rpython/jit/metainterp/optimizeopt/heap.py +++ b/rpython/jit/metainterp/optimizeopt/heap.py @@ -72,7 +72,7 @@ def do_setfield(self, optheap, op): # Update the state with the SETFIELD_GC/SETARRAYITEM_GC operation 'op'. structinfo = optheap.ensure_ptr_info_arg0(op) - arg1 = optheap.get_box_replacement(op.getarg(1)) + arg1 = optheap.get_box_replacement(self._getvalue(op)) if self.possible_aliasing(optheap, structinfo): self.force_lazy_setfield(optheap, op.getdescr()) assert not self.possible_aliasing(optheap, structinfo) diff --git a/rpython/jit/metainterp/optimizeopt/optimizer.py b/rpython/jit/metainterp/optimizeopt/optimizer.py --- a/rpython/jit/metainterp/optimizeopt/optimizer.py +++ b/rpython/jit/metainterp/optimizeopt/optimizer.py @@ -2,7 +2,7 @@ from rpython.jit.metainterp.executor import execute_nonspec_const from rpython.jit.metainterp.history import Const, ConstInt, ConstPtr from rpython.jit.metainterp.optimizeopt.intutils import IntBound,\ - ConstIntBound, MININT, MAXINT + ConstIntBound, MININT, MAXINT, IntUnbounded from rpython.jit.metainterp.optimizeopt.util import make_dispatcher_method from rpython.jit.metainterp.resoperation import rop, AbstractResOp, GuardResOp,\ OpHelpers, ResOperation @@ -57,9 +57,11 @@ if isinstance(op, ConstInt): return ConstIntBound(op.getint()) fw = op.get_forwarded() - if isinstance(fw, IntBound): - return fw - assert fw is None + if fw is not None: + if isinstance(fw, IntBound): + return fw + # rare case: fw might be a RawBufferPtrInfo + return IntUnbounded() assert op.type == 'i' intbound = IntBound(MININT, MAXINT) op.set_forwarded(intbound) @@ -72,7 +74,8 @@ return cur = op.get_forwarded() if cur is not None: - cur.intersect(bound) + if isinstance(cur, IntBound): + cur.intersect(bound) else: op.set_forwarded(bound) @@ -406,7 +409,8 @@ box = self.get_box_replacement(box) if not we_are_translated(): # safety-check if (box.get_forwarded() is not None and - isinstance(constbox, ConstInt)): + isinstance(constbox, ConstInt) and + not isinstance(box.get_forwarded(), info.AbstractRawPtrInfo)): assert box.get_forwarded().contains(constbox.getint()) if box.is_constant(): return diff --git a/rpython/jit/metainterp/optimizeopt/test/test_optimizebasic.py b/rpython/jit/metainterp/optimizeopt/test/test_optimizebasic.py --- a/rpython/jit/metainterp/optimizeopt/test/test_optimizebasic.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_optimizebasic.py @@ -5995,5 +5995,22 @@ """ self.optimize_loop(ops, expected) + def test_remove_multiple_setarrayitems(self): + ops = """ + [p0, i1] + setarrayitem_gc(p0, 2, NULL, descr=gcarraydescr) + guard_value(i1, 42) [] + setarrayitem_gc(p0, 2, NULL, descr=gcarraydescr) # remove this + finish() + """ + expected = """ + [p0, i1] + setarrayitem_gc(p0, 2, NULL, descr=gcarraydescr) + guard_value(i1, 42) [] + finish() + """ + self.optimize_loop(ops, expected) + + class TestLLtype(BaseTestOptimizeBasic, LLtypeMixin): pass diff --git a/rpython/jit/metainterp/optimizeopt/test/test_optimizeopt.py b/rpython/jit/metainterp/optimizeopt/test/test_optimizeopt.py --- a/rpython/jit/metainterp/optimizeopt/test/test_optimizeopt.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_optimizeopt.py @@ -8906,5 +8906,15 @@ """ self.optimize_loop(ops, expected) + def test_raw_buffer_ptr_info_intbounds_bug(self): + ops = """ + [] + i2 = call_i('malloc', 10, descr=raw_malloc_descr) + guard_value(i2, 12345) [] + jump() + """ + self.optimize_loop(ops, ops) + + class TestLLtype(OptimizeOptTest, LLtypeMixin): pass diff --git a/rpython/jit/metainterp/optimizeopt/virtualize.py b/rpython/jit/metainterp/optimizeopt/virtualize.py --- a/rpython/jit/metainterp/optimizeopt/virtualize.py +++ b/rpython/jit/metainterp/optimizeopt/virtualize.py @@ -256,13 +256,10 @@ offset = offsetbox.getint() # the following check is constant-folded to False if the # translation occurs without any VRawXxxValue instance around - if isinstance(opinfo, info.RawBufferPtrInfo): + if (isinstance(opinfo, info.RawBufferPtrInfo) or + isinstance(opinfo, info.RawSlicePtrInfo)): self.make_virtual_raw_slice(offset, opinfo, op) return - elif isinstance(opinfo, info.RawSlicePtrInfo): - offset = offset + opinfo.offset - self.make_virtual_raw_slice(offset, opinfo.parent, op) - return self.emit_operation(op) def optimize_ARRAYLEN_GC(self, op): diff --git a/rpython/jit/metainterp/test/test_ajit.py b/rpython/jit/metainterp/test/test_ajit.py --- a/rpython/jit/metainterp/test/test_ajit.py +++ b/rpython/jit/metainterp/test/test_ajit.py @@ -4342,3 +4342,15 @@ self.meta_interp(allfuncs, [9, 2000]) + + def test_unichar_might_be_signed(self): + py.test.skip("wchar_t is sometimes a signed 32-bit integer type, " + "but RPython inteprets it as unsigned (but still " + "translates to wchar_t, so can create confusion)") + def f(x): + return rffi.cast(lltype.Signed, rffi.cast(lltype.UniChar, x)) + res = self.interp_operations(f, [-1]) + if rffi.r_wchar_t.SIGN: + assert res == -1 + else: + assert res == 2 ** 16 - 1 or res == 2 ** 32 - 1 diff --git a/rpython/memory/gctransform/framework.py b/rpython/memory/gctransform/framework.py --- a/rpython/memory/gctransform/framework.py +++ b/rpython/memory/gctransform/framework.py @@ -902,39 +902,6 @@ op.args[0]], resultvar=op.result) - def gct_gc_shadowstackref_new(self, hop): - op = hop.spaceop - livevars = self.push_roots(hop) - hop.genop("direct_call", [self.root_walker.gc_shadowstackref_new_ptr], - resultvar=op.result) - self.pop_roots(hop, livevars) - - def gct_gc_shadowstackref_context(self, hop): - op = hop.spaceop - hop.genop("direct_call", - [self.root_walker.gc_shadowstackref_context_ptr, op.args[0]], - resultvar=op.result) - - def gct_gc_save_current_state_away(self, hop): - op = hop.spaceop - hop.genop("direct_call", - [self.root_walker.gc_save_current_state_away_ptr, - op.args[0], op.args[1]]) - - def gct_gc_forget_current_state(self, hop): - hop.genop("direct_call", - [self.root_walker.gc_forget_current_state_ptr]) - - def gct_gc_restore_state_from(self, hop): - op = hop.spaceop - hop.genop("direct_call", - [self.root_walker.gc_restore_state_from_ptr, - op.args[0]]) - - def gct_gc_start_fresh_new_state(self, hop): - hop.genop("direct_call", - [self.root_walker.gc_start_fresh_new_state_ptr]) - def gct_do_malloc_fixedsize(self, hop): # used by the JIT (see rpython.jit.backend.llsupport.gc) op = hop.spaceop diff --git a/rpython/memory/gctransform/shadowstack.py b/rpython/memory/gctransform/shadowstack.py --- a/rpython/memory/gctransform/shadowstack.py +++ b/rpython/memory/gctransform/shadowstack.py @@ -180,7 +180,7 @@ thread_stacks[gcdata.active_tid] = old_ref # # no GC operation from here -- switching shadowstack! - shadow_stack_pool.save_current_state_away(old_ref, llmemory.NULL) + shadow_stack_pool.save_current_state_away(old_ref) if new_ref: shadow_stack_pool.restore_state_from(new_ref) else: @@ -219,68 +219,17 @@ minimal_transform=False) def need_stacklet_support(self, gctransformer, getfn): - shadow_stack_pool = self.shadow_stack_pool - SHADOWSTACKREF = get_shadowstackref(self, gctransformer) - - def gc_shadowstackref_new(): - ssref = shadow_stack_pool.allocate(SHADOWSTACKREF) - return lltype.cast_opaque_ptr(llmemory.GCREF, ssref) - - def gc_shadowstackref_context(gcref): - ssref = lltype.cast_opaque_ptr(lltype.Ptr(SHADOWSTACKREF), gcref) - return ssref.context - - def gc_save_current_state_away(gcref, ncontext): - ssref = lltype.cast_opaque_ptr(lltype.Ptr(SHADOWSTACKREF), gcref) - shadow_stack_pool.save_current_state_away(ssref, ncontext) - - def gc_forget_current_state(): - shadow_stack_pool.forget_current_state() - - def gc_restore_state_from(gcref): - ssref = lltype.cast_opaque_ptr(lltype.Ptr(SHADOWSTACKREF), gcref) - shadow_stack_pool.restore_state_from(ssref) - - def gc_start_fresh_new_state(): - shadow_stack_pool.start_fresh_new_state() - - s_gcref = SomePtr(llmemory.GCREF) - s_addr = SomeAddress() - self.gc_shadowstackref_new_ptr = getfn(gc_shadowstackref_new, - [], s_gcref, - minimal_transform=False) - self.gc_shadowstackref_context_ptr = getfn(gc_shadowstackref_context, - [s_gcref], s_addr, - inline=True) - self.gc_save_current_state_away_ptr = getfn(gc_save_current_state_away, - [s_gcref, s_addr], - annmodel.s_None, - inline=True) - self.gc_forget_current_state_ptr = getfn(gc_forget_current_state, - [], annmodel.s_None, - inline=True) - self.gc_restore_state_from_ptr = getfn(gc_restore_state_from, - [s_gcref], annmodel.s_None, - inline=True) - self.gc_start_fresh_new_state_ptr = getfn(gc_start_fresh_new_state, - [], annmodel.s_None, - inline=True) + from rpython.rlib import _stacklet_shadowstack + _stacklet_shadowstack.complete_destrptr(gctransformer) # ____________________________________________________________ class ShadowStackPool(object): - """Manages a pool of shadowstacks. The MAX most recently used - shadowstacks are fully allocated and can be directly jumped into - (called "full stacks" below). - The rest are stored in a more virtual-memory-friendly way, i.e. - with just the right amount malloced. Before they can run, they - must be copied into a full shadowstack. + """Manages a pool of shadowstacks. """ _alloc_flavor_ = "raw" root_stack_depth = 163840 - MAX = 20 - def __init__(self, gcdata): self.unused_full_stack = llmemory.NULL self.gcdata = gcdata @@ -293,28 +242,18 @@ """Allocate an empty SHADOWSTACKREF object.""" return lltype.malloc(SHADOWSTACKREF, zero=True) - def save_current_state_away(self, shadowstackref, ncontext): + def save_current_state_away(self, shadowstackref): """Save the current state away into 'shadowstackref'. This either works, or raise MemoryError and nothing is done. To do a switch, first call save_current_state_away() or forget_current_state(), and then call restore_state_from() or start_fresh_new_state(). """ - fresh_free_fullstack = shadowstackref.prepare_free_slot() - if self.unused_full_stack: - if fresh_free_fullstack: - llmemory.raw_free(fresh_free_fullstack) - elif fresh_free_fullstack: - self.unused_full_stack = fresh_free_fullstack - else: - self._prepare_unused_stack() - # + self._prepare_unused_stack() shadowstackref.base = self.gcdata.root_stack_base shadowstackref.top = self.gcdata.root_stack_top - shadowstackref.context = ncontext ll_assert(shadowstackref.base <= shadowstackref.top, "save_current_state_away: broken shadowstack") - shadowstackref.attach() # # cannot use llop.gc_writebarrier() here, because # we are in a minimally-transformed GC helper :-/ @@ -337,7 +276,6 @@ ll_assert(bool(shadowstackref.base), "empty shadowstackref!") ll_assert(shadowstackref.base <= shadowstackref.top, "restore_state_from: broken shadowstack") - self.unused_full_stack = shadowstackref.rebuild(self.unused_full_stack) self.gcdata.root_stack_base = shadowstackref.base self.gcdata.root_stack_top = shadowstackref.top self._cleanup(shadowstackref) @@ -350,127 +288,28 @@ def _cleanup(self, shadowstackref): shadowstackref.base = llmemory.NULL shadowstackref.top = llmemory.NULL - shadowstackref.context = llmemory.NULL def _prepare_unused_stack(self): - ll_assert(self.unused_full_stack == llmemory.NULL, - "already an unused_full_stack") - root_stack_size = sizeofaddr * self.root_stack_depth - self.unused_full_stack = llmemory.raw_malloc(root_stack_size) if self.unused_full_stack == llmemory.NULL: - raise MemoryError + root_stack_size = sizeofaddr * self.root_stack_depth + self.unused_full_stack = llmemory.raw_malloc(root_stack_size) + if self.unused_full_stack == llmemory.NULL: + raise MemoryError def get_shadowstackref(root_walker, gctransformer): if hasattr(gctransformer, '_SHADOWSTACKREF'): return gctransformer._SHADOWSTACKREF - # Helpers to same virtual address space by limiting to MAX the - # number of full shadow stacks. If there are more, we compact - # them into a separately-allocated zone of memory of just the right - # size. See the comments in the definition of fullstack_cache below. - - def ll_prepare_free_slot(_unused): - """Free up a slot in the array of MAX entries, ready for storing - a new shadowstackref. Return the memory of the now-unused full - shadowstack. - """ - index = fullstack_cache[0] - if index > 0: - return llmemory.NULL # there is already at least one free slot - # - # make a compact copy in one old entry and return the - # original full-sized memory - index = -index - ll_assert(index > 0, "prepare_free_slot: cache[0] == 0") - compacting = lltype.cast_int_to_ptr(SHADOWSTACKREFPTR, - fullstack_cache[index]) - index += 1 - if index >= ShadowStackPool.MAX: - index = 1 - fullstack_cache[0] = -index # update to the next value in order - # - compacting.detach() - original = compacting.base - size = compacting.top - original - new = llmemory.raw_malloc(size) - if new == llmemory.NULL: - return llmemory.NULL - llmemory.raw_memcopy(original, new, size) - compacting.base = new - compacting.top = new + size - return original - - def ll_attach(shadowstackref): - """After prepare_free_slot(), store a shadowstackref in that slot.""" - index = fullstack_cache[0] - ll_assert(index > 0, "fullstack attach: no free slot") - fullstack_cache[0] = fullstack_cache[index] - fullstack_cache[index] = lltype.cast_ptr_to_int(shadowstackref) - ll_assert(shadowstackref.fsindex == 0, "fullstack attach: already one?") - shadowstackref.fsindex = index # > 0 - - def ll_detach(shadowstackref): - """Detach a shadowstackref from the array of MAX entries.""" - index = shadowstackref.fsindex - ll_assert(index > 0, "detach: unattached shadowstackref") - ll_assert(fullstack_cache[index] == - lltype.cast_ptr_to_int(shadowstackref), - "detach: bad fullstack_cache") - shadowstackref.fsindex = 0 - fullstack_cache[index] = fullstack_cache[0] - fullstack_cache[0] = index - - def ll_rebuild(shadowstackref, fullstack_base): - if shadowstackref.fsindex > 0: - shadowstackref.detach() - return fullstack_base - else: - # make an expanded copy of the compact shadowstack stored in - # 'shadowstackref' and free that - compact = shadowstackref.base - size = shadowstackref.top - compact - shadowstackref.base = fullstack_base - shadowstackref.top = fullstack_base + size - llmemory.raw_memcopy(compact, fullstack_base, size) - llmemory.raw_free(compact) - return llmemory.NULL - SHADOWSTACKREFPTR = lltype.Ptr(lltype.GcForwardReference()) SHADOWSTACKREF = lltype.GcStruct('ShadowStackRef', - ('base', llmemory.Address), - ('top', llmemory.Address), - ('context', llmemory.Address), - ('fsindex', lltype.Signed), - rtti=True, - adtmeths={'prepare_free_slot': ll_prepare_free_slot, - 'attach': ll_attach, - 'detach': ll_detach, - 'rebuild': ll_rebuild}) + ('base', llmemory.Address), + ('top', llmemory.Address), + rtti=True) SHADOWSTACKREFPTR.TO.become(SHADOWSTACKREF) - # Items 1..MAX-1 of the following array can be SHADOWSTACKREF - # addresses cast to integer. Or, they are small numbers and they - # make up a free list, rooted in item 0, which goes on until - # terminated with a negative item. This negative item gives (the - # opposite of) the index of the entry we try to remove next. - # Initially all items are in this free list and the end is '-1'. - fullstack_cache = lltype.malloc(lltype.Array(lltype.Signed), - ShadowStackPool.MAX, - flavor='raw', immortal=True) - for i in range(len(fullstack_cache) - 1): - fullstack_cache[i] = i + 1 - fullstack_cache[len(fullstack_cache) - 1] = -1 - def customtrace(gc, obj, callback, arg): obj = llmemory.cast_adr_to_ptr(obj, SHADOWSTACKREFPTR) - index = obj.fsindex - if index > 0: - # Haaaaaaack: fullstack_cache[] is just an integer, so it - # doesn't follow the SHADOWSTACKREF when it moves. But we - # know this customtrace() will be called just after the - # move. So we fix the fullstack_cache[] now... :-/ - fullstack_cache[index] = lltype.cast_ptr_to_int(obj) addr = obj.top start = obj.base while addr != start: @@ -484,22 +323,10 @@ (SHADOWSTACKREF, customtrace)) def shadowstack_destructor(shadowstackref): - if root_walker.stacklet_support: - from rpython.rlib import _rffi_stacklet as _c - h = shadowstackref.context - h = llmemory.cast_adr_to_ptr(h, _c.handle) - shadowstackref.context = llmemory.NULL - # - if shadowstackref.fsindex > 0: - shadowstackref.detach() base = shadowstackref.base shadowstackref.base = llmemory.NULL shadowstackref.top = llmemory.NULL llmemory.raw_free(base) - # - if root_walker.stacklet_support: - if h: - _c.destroy(h) destrptr = gctransformer.annotate_helper(shadowstack_destructor, [SHADOWSTACKREFPTR], lltype.Void) diff --git a/rpython/rlib/_stacklet_shadowstack.py b/rpython/rlib/_stacklet_shadowstack.py --- a/rpython/rlib/_stacklet_shadowstack.py +++ b/rpython/rlib/_stacklet_shadowstack.py @@ -1,104 +1,176 @@ from rpython.rlib import _rffi_stacklet as _c from rpython.rlib.debug import ll_assert -from rpython.rtyper.annlowlevel import llhelper -from rpython.rtyper.lltypesystem import lltype, llmemory +from rpython.rlib import rgc +from rpython.rtyper.annlowlevel import llhelper, MixLevelHelperAnnotator +from rpython.rtyper.lltypesystem import lltype, llmemory, rffi from rpython.rtyper.lltypesystem.lloperation import llop +from rpython.annotator import model as annmodel +from rpython.rtyper.llannotation import lltype_to_annotation -NULL_SUSPSTACK = lltype.nullptr(llmemory.GCREF.TO) +# +# A GC wrapper around the C stacklet handles, with additionally a +# copy of the shadowstack (for all stacklets different than the main) +# +STACKLET = lltype.GcStruct('Stacklet', + ('s_handle', _c.handle), + ('s_sscopy', llmemory.Address), + rtti=True) +STACKLET_PTR = lltype.Ptr(STACKLET) +NULL_STACKLET = lltype.nullptr(STACKLET) +def complete_destrptr(gctransformer): + translator = gctransformer.translator + mixlevelannotator = MixLevelHelperAnnotator(translator.rtyper) + args_s = [lltype_to_annotation(STACKLET_PTR)] + s_result = annmodel.s_None + destrptr = mixlevelannotator.delayedfunction(stacklet_destructor, + args_s, s_result) + mixlevelannotator.finish() + lltype.attachRuntimeTypeInfo(STACKLET, destrptr=destrptr) + +def stacklet_destructor(stacklet): + sscopy = stacklet.s_sscopy + if sscopy: + llmemory.raw_free(sscopy) + h = stacklet.s_handle + if h: + _c.destroy(h) + + +SIZEADDR = llmemory.sizeof(llmemory.Address) + +def customtrace(gc, obj, callback, arg): + stacklet = llmemory.cast_adr_to_ptr(obj, STACKLET_PTR) + sscopy = stacklet.s_sscopy + if sscopy: + length_bytes = sscopy.signed[0] + while length_bytes > 0: + addr = sscopy + length_bytes + gc._trace_callback(callback, arg, addr) + length_bytes -= SIZEADDR +lambda_customtrace = lambda: customtrace + +def sscopy_detach_shadow_stack(): + base = llop.gc_adr_of_root_stack_base(llmemory.Address).address[0] + top = llop.gc_adr_of_root_stack_top(llmemory.Address).address[0] + length_bytes = top - base + result = llmemory.raw_malloc(SIZEADDR + length_bytes) + if result: + result.signed[0] = length_bytes + llmemory.raw_memcopy(base, result + SIZEADDR, length_bytes) + llop.gc_adr_of_root_stack_top(llmemory.Address).address[0] = base + return result + +def sscopy_attach_shadow_stack(sscopy): + base = llop.gc_adr_of_root_stack_base(llmemory.Address).address[0] + ll_assert(llop.gc_adr_of_root_stack_top(llmemory.Address).address[0]==base, + "attach_shadow_stack: ss is not empty?") + length_bytes = sscopy.signed[0] + llmemory.raw_memcopy(sscopy + SIZEADDR, base, length_bytes) + llop.gc_adr_of_root_stack_top(llmemory.Address).address[0] = ( + base + length_bytes) + llmemory.raw_free(sscopy) + +def alloc_stacklet(): + new_stacklet = lltype.malloc(STACKLET) + new_stacklet.s_handle = _c.null_handle + return new_stacklet + +def attach_handle_on_stacklet(stacklet, h): + if not h: + raise MemoryError + elif _c.is_empty_handle(h): + ll_assert(gcrootfinder.sscopy == llmemory.NULL, + "empty_handle but sscopy != NULL") + return NULL_STACKLET + else: + # This is a return that gave us a real handle. Store it. + stacklet.s_handle = h + stacklet.s_sscopy = gcrootfinder.sscopy + ll_assert(gcrootfinder.sscopy != llmemory.NULL, + "!empty_handle but sscopy == NULL") + gcrootfinder.sscopy = llmemory.NULL + llop.gc_writebarrier(lltype.Void, llmemory.cast_ptr_to_adr(stacklet)) + return stacklet + +def consume_stacklet(stacklet): + h = stacklet.s_handle + ll_assert(bool(h), "consume_stacklet: null handle") + stacklet.s_handle = _c.null_handle + stacklet.s_sscopy = llmemory.NULL + return h + def _new_callback(h, arg): - # We still have the old shadowstack active at this point; save it - # away, and start a fresh new one - oldsuspstack = gcrootfinder.oldsuspstack - h = llmemory.cast_ptr_to_adr(h) - llop.gc_save_current_state_away(lltype.Void, - oldsuspstack, h) - llop.gc_start_fresh_new_state(lltype.Void) - gcrootfinder.oldsuspstack = NULL_SUSPSTACK + # There is a fresh stacklet object waiting on the gcrootfinder, + # so populate it with data that represents the parent suspended + # stacklet and detach the stacklet object from gcrootfinder. + stacklet = gcrootfinder.fresh_stacklet + gcrootfinder.fresh_stacklet = NULL_STACKLET + ll_assert(stacklet != NULL_STACKLET, "_new_callback: NULL #1") + stacklet = attach_handle_on_stacklet(stacklet, h) + ll_assert(stacklet != NULL_STACKLET, "_new_callback: NULL #2") # - newsuspstack = gcrootfinder.callback(oldsuspstack, arg) + # Call the main function provided by the (RPython) user. + stacklet = gcrootfinder.runfn(stacklet, arg) # - # Finishing this stacklet. - gcrootfinder.oldsuspstack = NULL_SUSPSTACK - gcrootfinder.newsuspstack = newsuspstack - h = llop.gc_shadowstackref_context(llmemory.Address, newsuspstack) - return llmemory.cast_adr_to_ptr(h, _c.handle) + # Here, 'stacklet' points to the target stacklet to which we want + # to jump to next. Read the 'handle' and forget about the + # stacklet object. + gcrootfinder.sscopy = llmemory.NULL + return consume_stacklet(stacklet) -def prepare_old_suspstack(): - if not gcrootfinder.oldsuspstack: # else reuse the one still there - _allocate_old_suspstack() +def _new(thread_handle, arg): + # No shadowstack manipulation here (no usage of gc references) + sscopy = sscopy_detach_shadow_stack() + gcrootfinder.sscopy = sscopy + if not sscopy: + return _c.null_handle + h = _c.new(thread_handle, llhelper(_c.run_fn, _new_callback), arg) + sscopy_attach_shadow_stack(sscopy) + return h +_new._dont_inline_ = True -def _allocate_old_suspstack(): - suspstack = llop.gc_shadowstackref_new(llmemory.GCREF) - gcrootfinder.oldsuspstack = suspstack -_allocate_old_suspstack._dont_inline_ = True - -def get_result_suspstack(h): - # Now we are in the target, after the switch() or the new(). - # Note that this whole module was carefully written in such a way as - # not to invoke pushing/popping things off the shadowstack at - # unexpected moments... - oldsuspstack = gcrootfinder.oldsuspstack - newsuspstack = gcrootfinder.newsuspstack - gcrootfinder.oldsuspstack = NULL_SUSPSTACK - gcrootfinder.newsuspstack = NULL_SUSPSTACK - if not h: - raise MemoryError - # We still have the old shadowstack active at this point; save it - # away, and restore the new one - if oldsuspstack: - ll_assert(not _c.is_empty_handle(h),"unexpected empty stacklet handle") - h = llmemory.cast_ptr_to_adr(h) - llop.gc_save_current_state_away(lltype.Void, oldsuspstack, h) - else: - ll_assert(_c.is_empty_handle(h),"unexpected non-empty stacklet handle") - llop.gc_forget_current_state(lltype.Void) - # - llop.gc_restore_state_from(lltype.Void, newsuspstack) - # - # From this point on, 'newsuspstack' is consumed and done, its - # shadow stack installed as the current one. It should not be - # used any more. For performance, we avoid it being deallocated - # by letting it be reused on the next switch. - gcrootfinder.oldsuspstack = newsuspstack - # Return. - return oldsuspstack +def _switch(h): + # No shadowstack manipulation here (no usage of gc references) + sscopy = sscopy_detach_shadow_stack() + gcrootfinder.sscopy = sscopy + if not sscopy: + return _c.null_handle + h = _c.switch(h) + sscopy_attach_shadow_stack(sscopy) + return h +_switch._dont_inline_ = True class StackletGcRootFinder(object): - def new(thrd, callback, arg): - gcrootfinder.callback = callback - thread_handle = thrd._thrd - prepare_old_suspstack() - h = _c.new(thread_handle, llhelper(_c.run_fn, _new_callback), arg) - return get_result_suspstack(h) - new._dont_inline_ = True - new = staticmethod(new) - - def switch(suspstack): - # suspstack has a handle to target, i.e. where to switch to - ll_assert(suspstack != gcrootfinder.oldsuspstack, - "stacklet: invalid use") - gcrootfinder.newsuspstack = suspstack - h = llop.gc_shadowstackref_context(llmemory.Address, suspstack) - h = llmemory.cast_adr_to_ptr(h, _c.handle) - prepare_old_suspstack() - h = _c.switch(h) - return get_result_suspstack(h) - switch._dont_inline_ = True - switch = staticmethod(switch) + fresh_stacklet = NULL_STACKLET @staticmethod - def is_empty_handle(suspstack): - return not suspstack + def new(thrd, callback, arg): + rgc.register_custom_trace_hook(STACKLET, lambda_customtrace) + result_stacklet = alloc_stacklet() + gcrootfinder.fresh_stacklet = alloc_stacklet() + gcrootfinder.runfn = callback + thread_handle = thrd._thrd + h = _new(thread_handle, arg) + return attach_handle_on_stacklet(result_stacklet, h) + + @staticmethod + def switch(stacklet): + # 'stacklet' has a handle to target, i.e. where to switch to + h = consume_stacklet(stacklet) + h = _switch(h) + return attach_handle_on_stacklet(stacklet, h) + + @staticmethod + def is_empty_handle(stacklet): + return not stacklet @staticmethod def get_null_handle(): - return NULL_SUSPSTACK + return NULL_STACKLET gcrootfinder = StackletGcRootFinder() -gcrootfinder.oldsuspstack = NULL_SUSPSTACK -gcrootfinder.newsuspstack = NULL_SUSPSTACK diff --git a/rpython/rtyper/llinterp.py b/rpython/rtyper/llinterp.py --- a/rpython/rtyper/llinterp.py +++ b/rpython/rtyper/llinterp.py @@ -890,19 +890,6 @@ def op_gc_reattach_callback_pieces(self): raise NotImplementedError("gc_reattach_callback_pieces") - def op_gc_shadowstackref_new(self): # stacklet+shadowstack - raise NotImplementedError("gc_shadowstackref_new") - def op_gc_shadowstackref_context(self): - raise NotImplementedError("gc_shadowstackref_context") - def op_gc_save_current_state_away(self): - raise NotImplementedError("gc_save_current_state_away") - def op_gc_forget_current_state(self): - raise NotImplementedError("gc_forget_current_state") - def op_gc_restore_state_from(self): - raise NotImplementedError("gc_restore_state_from") - def op_gc_start_fresh_new_state(self): - raise NotImplementedError("gc_start_fresh_new_state") - def op_gc_get_type_info_group(self): raise NotImplementedError("gc_get_type_info_group") diff --git a/rpython/rtyper/lltypesystem/lloperation.py b/rpython/rtyper/lltypesystem/lloperation.py --- a/rpython/rtyper/lltypesystem/lloperation.py +++ b/rpython/rtyper/lltypesystem/lloperation.py @@ -521,14 +521,6 @@ 'gc_detach_callback_pieces': LLOp(), 'gc_reattach_callback_pieces': LLOp(), - # for stacklet+shadowstack support - 'gc_shadowstackref_new': LLOp(canmallocgc=True), - 'gc_shadowstackref_context': LLOp(), - 'gc_save_current_state_away': LLOp(), - 'gc_forget_current_state': LLOp(), - 'gc_restore_state_from': LLOp(), - 'gc_start_fresh_new_state': LLOp(), - # NOTE NOTE NOTE! don't forget *** canmallocgc=True *** for anything that # can malloc a GC object. _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit