Author: Armin Rigo <[email protected]>
Branch: cffi-static-callback-embedding
Changeset: r81546:cd096cdf82e1
Date: 2016-01-04 16:52 +0100
http://bitbucket.org/pypy/pypy/changeset/cd096cdf82e1/

Log:    hg merge ec-keepalive

diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -5,6 +5,8 @@
 .. this is a revision shortly after release-4.0.1
 .. startrev: 4b5c840d0da2
 
+Fixed ``_PyLong_FromByteArray()``, which was buggy.
+
 .. branch: numpy-1.10
 
 Fix tests to run cleanly with -A and start to fix micronumpy for upstream numpy
diff --git a/pypy/module/cpyext/longobject.py b/pypy/module/cpyext/longobject.py
--- a/pypy/module/cpyext/longobject.py
+++ b/pypy/module/cpyext/longobject.py
@@ -228,26 +228,11 @@
 def _PyLong_FromByteArray(space, bytes, n, little_endian, signed):
     little_endian = rffi.cast(lltype.Signed, little_endian)
     signed = rffi.cast(lltype.Signed, signed)
-
-    result = rbigint()
-    negative = False
-
-    for i in range(0, n):
-        if little_endian:
-            c = intmask(bytes[i])
-        else:
-            c = intmask(bytes[n - i - 1])
-        if i == 0 and signed and c & 0x80:
-            negative = True
-        if negative:
-            c = c ^ 0xFF
-        digit = rbigint.fromint(c)
-
-        result = result.lshift(8)
-        result = result.add(digit)
-
-    if negative:
-        result = result.neg()
-
+    s = rffi.charpsize2str(rffi.cast(rffi.CCHARP, bytes),
+                           rffi.cast(lltype.Signed, n))
+    if little_endian:
+        byteorder = 'little'
+    else:
+        byteorder = 'big'
+    result = rbigint.frombytes(s, byteorder, signed != 0)
     return space.newlong_from_rbigint(result)
-
diff --git a/pypy/module/cpyext/slotdefs.py b/pypy/module/cpyext/slotdefs.py
--- a/pypy/module/cpyext/slotdefs.py
+++ b/pypy/module/cpyext/slotdefs.py
@@ -4,8 +4,7 @@
 
 from rpython.rtyper.lltypesystem import rffi, lltype
 from pypy.module.cpyext.api import (
-    cpython_api, generic_cpy_call, PyObject, Py_ssize_t, Py_TPFLAGS_CHECKTYPES,
-    CANNOT_FAIL)
+    cpython_api, generic_cpy_call, PyObject, Py_ssize_t, Py_TPFLAGS_CHECKTYPES)
 from pypy.module.cpyext.typeobjectdefs import (
     unaryfunc, wrapperfunc, ternaryfunc, PyTypeObjectPtr, binaryfunc,
     getattrfunc, getattrofunc, setattrofunc, lenfunc, ssizeargfunc, inquiry,
@@ -387,7 +386,7 @@
             return
 
         @cpython_api([PyObject, PyObject], PyObject,
-                     error=CANNOT_FAIL, external=True)
+                     external=True)
         @func_renamer("cpyext_tp_getattro_%s" % (typedef.name,))
         def slot_tp_getattro(space, w_self, w_name):
             return space.call_function(getattr_fn, w_self, w_name)
diff --git a/pypy/module/cpyext/test/test_longobject.py 
b/pypy/module/cpyext/test/test_longobject.py
--- a/pypy/module/cpyext/test/test_longobject.py
+++ b/pypy/module/cpyext/test/test_longobject.py
@@ -175,10 +175,26 @@
                                               little_endian, is_signed);
              """),
             ])
-        assert module.from_bytearray(True, False) == 0x9ABC
-        assert module.from_bytearray(True, True) == -0x6543
-        assert module.from_bytearray(False, False) == 0xBC9A
-        assert module.from_bytearray(False, True) == -0x4365
+        assert module.from_bytearray(True, False) == 0xBC9A
+        assert module.from_bytearray(True, True) == -0x4366
+        assert module.from_bytearray(False, False) == 0x9ABC
+        assert module.from_bytearray(False, True) == -0x6544
+
+    def test_frombytearray_2(self):
+        module = self.import_extension('foo', [
+            ("from_bytearray", "METH_VARARGS",
+             """
+                 int little_endian, is_signed;
+                 if (!PyArg_ParseTuple(args, "ii", &little_endian, &is_signed))
+                     return NULL;
+                 return _PyLong_FromByteArray("\x9A\xBC\x41", 3,
+                                              little_endian, is_signed);
+             """),
+            ])
+        assert module.from_bytearray(True, False) == 0x41BC9A
+        assert module.from_bytearray(True, True) == 0x41BC9A
+        assert module.from_bytearray(False, False) == 0x9ABC41
+        assert module.from_bytearray(False, True) == -0x6543BF
 
     def test_fromunicode(self):
         module = self.import_extension('foo', [
diff --git a/pypy/module/cpyext/test/test_typeobject.py 
b/pypy/module/cpyext/test/test_typeobject.py
--- a/pypy/module/cpyext/test/test_typeobject.py
+++ b/pypy/module/cpyext/test/test_typeobject.py
@@ -414,15 +414,26 @@
                      return NULL;
                  }
                  PyObject *name = PyString_FromString("attr1");
-                 PyIntObject *attr1 = obj->ob_type->tp_getattro(obj, name);
-                 if (attr1->ob_ival != value->ob_ival)
+                 PyIntObject *attr = obj->ob_type->tp_getattro(obj, name);
+                 if (attr->ob_ival != value->ob_ival)
                  {
                      PyErr_SetString(PyExc_ValueError,
                                      "tp_getattro returned wrong value");
                      return NULL;
                  }
                  Py_DECREF(name);
-                 Py_DECREF(attr1);
+                 Py_DECREF(attr);
+                 name = PyString_FromString("attr2");
+                 attr = obj->ob_type->tp_getattro(obj, name);
+                 if (attr == NULL && 
PyErr_ExceptionMatches(PyExc_AttributeError))
+                 {
+                     PyErr_Clear();
+                 } else {
+                     PyErr_SetString(PyExc_ValueError,
+                                     "tp_getattro should have raised");
+                     return NULL;
+                 }
+                 Py_DECREF(name);
                  Py_RETURN_TRUE;
              '''
              )
@@ -637,7 +648,7 @@
                 IntLikeObject *intObj;
                 long intval;
 
-                if (!PyArg_ParseTuple(args, "i", &intval))
+                if (!PyArg_ParseTuple(args, "l", &intval))
                     return NULL;
 
                 IntLike_Type.tp_as_number = &intlike_as_number;
@@ -657,7 +668,7 @@
                 IntLikeObjectNoOp *intObjNoOp;
                 long intval;
 
-                if (!PyArg_ParseTuple(args, "i", &intval))
+                if (!PyArg_ParseTuple(args, "l", &intval))
                     return NULL;
 
                 IntLike_Type_NoOp.tp_flags |= Py_TPFLAGS_CHECKTYPES;
diff --git a/pypy/module/pypyjit/test_pypy_c/test_struct.py 
b/pypy/module/pypyjit/test_pypy_c/test_struct.py
--- a/pypy/module/pypyjit/test_pypy_c/test_struct.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_struct.py
@@ -45,7 +45,7 @@
 
         # the newstr and the strsetitems are because the string is forced,
         # which is in turn because the optimizer doesn't know how to handle a
-        # getarrayitem_gc_i on a virtual string. It could be improved, but it
+        # gc_load_indexed_i on a virtual string. It could be improved, but it
         # is also true that in real life cases struct.unpack is called on
         # strings which come from the outside, so it's a minor issue.
         assert loop.match_by_id("unpack", """
@@ -55,17 +55,17 @@
             strsetitem(p88, 1, i14)
             strsetitem(p88, 2, i17)
             strsetitem(p88, 3, i20)
-            i91 = getarrayitem_gc_i(p88, 0, descr=<ArrayS 4>)
+            i91 = gc_load_indexed_i(p88, 0, 1, _, -4)
         """)
 
     def test_struct_object(self):
         def main(n):
             import struct
-            s = struct.Struct("i")
+            s = struct.Struct("ii")
             i = 1
             while i < n:
-                buf = s.pack(i)       # ID: pack
-                x = s.unpack(buf)[0]  # ID: unpack
+                buf = s.pack(-1, i)     # ID: pack
+                x = s.unpack(buf)[1]    # ID: unpack
                 i += x / i
             return i
 
@@ -88,10 +88,15 @@
 
         assert loop.match_by_id('unpack', """
             # struct.unpack
-            p88 = newstr(4)
-            strsetitem(p88, 0, i11)
-            strsetitem(p88, 1, i14)
-            strsetitem(p88, 2, i17)
-            strsetitem(p88, 3, i20)
-            i91 = getarrayitem_gc_i(p88, 0, descr=<ArrayS 4>)
+            p88 = newstr(8)
+            strsetitem(p88, 0, 255)
+            strsetitem(p88, 1, 255)
+            strsetitem(p88, 2, 255)
+            strsetitem(p88, 3, 255)
+            strsetitem(p88, 4, i11)
+            strsetitem(p88, 5, i14)
+            strsetitem(p88, 6, i17)
+            strsetitem(p88, 7, i20)
+            i90 = gc_load_indexed_i(p88, 0, 1, _, -4)
+            i91 = gc_load_indexed_i(p88, 4, 1, _, -4)
         """)
diff --git a/pypy/module/thread/__init__.py b/pypy/module/thread/__init__.py
--- a/pypy/module/thread/__init__.py
+++ b/pypy/module/thread/__init__.py
@@ -27,7 +27,7 @@
         from pypy.module.thread import gil
         MixedModule.__init__(self, space, *args)
         prev_ec = space.threadlocals.get_ec()
-        space.threadlocals = gil.GILThreadLocals()
+        space.threadlocals = gil.GILThreadLocals(space)
         space.threadlocals.initialize(space)
         if prev_ec is not None:
             space.threadlocals._set_ec(prev_ec)
diff --git a/pypy/module/thread/test/test_gil.py 
b/pypy/module/thread/test/test_gil.py
--- a/pypy/module/thread/test/test_gil.py
+++ b/pypy/module/thread/test/test_gil.py
@@ -65,7 +65,7 @@
             except Exception, e:
                 assert 0
             thread.gc_thread_die()
-        my_gil_threadlocals = gil.GILThreadLocals()
+        my_gil_threadlocals = gil.GILThreadLocals(space)
         def f():
             state.data = []
             state.datalen1 = 0
diff --git a/pypy/module/thread/threadlocals.py 
b/pypy/module/thread/threadlocals.py
--- a/pypy/module/thread/threadlocals.py
+++ b/pypy/module/thread/threadlocals.py
@@ -1,5 +1,6 @@
-from rpython.rlib import rthread
+from rpython.rlib import rthread, rweaklist
 from rpython.rlib.objectmodel import we_are_translated
+from rpython.rlib.rarithmetic import r_ulonglong
 from pypy.module.thread.error import wrap_thread_error
 from pypy.interpreter.executioncontext import ExecutionContext
 
@@ -13,15 +14,53 @@
     a thread finishes.  This works as long as the thread was started by
     os_thread.bootstrap()."""
 
-    def __init__(self):
+    _next_generation = r_ulonglong(0)
+
+    def __init__(self, space):
         "NOT_RPYTHON"
-        self._valuedict = {}   # {thread_ident: ExecutionContext()}
+        #
+        # This object tracks code that enters and leaves threads.
+        # There are two APIs.  For Python-level threads, we know when
+        # the thread starts and ends, and we call enter_thread() and
+        # leave_thread().  In a few other cases, like callbacks, we
+        # might be running in some never-seen-before thread: in this
+        # case, the callback logic needs to call try_enter_thread() at
+        # the start, and if this returns True it needs to call
+        # leave_thread() at the end.
+        #
+        # We implement an optimization for the second case (which only
+        # works if we translate with a framework GC and with
+        # rweakref).  If try_enter_thread() is called in a
+        # never-seen-before thread, it still returns False and
+        # remembers the ExecutionContext with 'self._weaklist'.  The
+        # next time we call try_enter_thread() again in the same
+        # thread, the ExecutionContext is reused.  The optimization is
+        # not completely invisible to the user: 'thread._local()'
+        # values will remain.  We can argue that it is the correct
+        # behavior to do that, and the behavior we get if the
+        # optimization is disabled is buggy (but hard to do better
+        # then).
+        #
+        # 'self._valuedict' is a dict mapping the thread idents to
+        # ExecutionContexts; it does not list the ExecutionContexts
+        # which are in 'self._weaklist'.  (The latter is more precisely
+        # a list of AutoFreeECWrapper objects, defined below, which
+        # each references the ExecutionContext.)
+        #
+        self.space = space
+        self._valuedict = {}
         self._cleanup_()
         self.raw_thread_local = rthread.ThreadLocalReference(ExecutionContext,
                                                             
loop_invariant=True)
 
+    def can_optimize_with_weaklist(self):
+        config = self.space.config
+        return (config.translation.rweakref and
+                rthread.ThreadLocalReference.automatic_keepalive(config))
+
     def _cleanup_(self):
         self._valuedict.clear()
+        self._weaklist = None
         self._mainthreadident = 0
 
     def enter_thread(self, space):
@@ -29,19 +68,36 @@
         self._set_ec(space.createexecutioncontext())
 
     def try_enter_thread(self, space):
-        if rthread.get_ident() in self._valuedict:
+        # common case: the thread-local has already got a value
+        if self.raw_thread_local.get() is not None:
             return False
-        self.enter_thread(space)
-        return True
 
-    def _set_ec(self, ec):
+        # Else, make and attach a new ExecutionContext
+        ec = space.createexecutioncontext()
+        if not self.can_optimize_with_weaklist():
+            self._set_ec(ec)
+            return True
+
+        # If can_optimize_with_weaklist(), then 'rthread' keeps the
+        # thread-local values alive until the end of the thread.  Use
+        # AutoFreeECWrapper as an object with a __del__; when this
+        # __del__ is called, it means the thread was really finished.
+        # In this case we don't want leave_thread() to be called
+        # explicitly, so we return False.
+        if self._weaklist is None:
+            self._weaklist = ListECWrappers()
+            self._weaklist.initialize()
+        self._weaklist.add_handle(AutoFreeECWrapper(self, ec))
+        self._set_ec(ec, register_in_valuedict=False)
+        return False
+
+    def _set_ec(self, ec, register_in_valuedict=True):
         ident = rthread.get_ident()
         if self._mainthreadident == 0 or self._mainthreadident == ident:
             ec._signals_enabled = 1    # the main thread is enabled
             self._mainthreadident = ident
-        self._valuedict[ident] = ec
-        # This logic relies on hacks and _make_sure_does_not_move().
-        # It only works because we keep the 'ec' alive in '_valuedict' too.
+        if register_in_valuedict:
+            self._valuedict[ident] = ec
         self.raw_thread_local.set(ec)
 
     def leave_thread(self, space):
@@ -84,7 +140,27 @@
         ec._signals_enabled = new
 
     def getallvalues(self):
-        return self._valuedict
+        if self._weaklist is None:
+            return self._valuedict
+        # This logic walks the 'self._weaklist' list and adds the
+        # ExecutionContexts to 'result'.  We are careful in case there
+        # are two AutoFreeECWrappers in the list which have the same
+        # 'ident'; in this case we must keep the most recent one (the
+        # older one should be deleted soon).  Moreover, entries in
+        # self._valuedict have priority because they are never
+        # outdated.
+        result = {}
+        generations = {}
+        for h in self._weaklist.get_all_handles():
+            wrapper = h()
+            if wrapper is not None:
+                key = wrapper.ident
+                prev = generations.get(key, r_ulonglong(0))
+                if wrapper.generation > prev:   # implies '.generation != 0'
+                    generations[key] = wrapper.generation
+                    result[key] = wrapper.ec
+        result.update(self._valuedict)
+        return result
 
     def reinit_threads(self, space):
         "Called in the child process after a fork()"
@@ -94,7 +170,31 @@
         old_sig = ec._signals_enabled
         if ident != self._mainthreadident:
             old_sig += 1
-        self._cleanup_()
+        self._cleanup_()      # clears self._valuedict
         self._mainthreadident = ident
         self._set_ec(ec)
         ec._signals_enabled = old_sig
+
+
+class AutoFreeECWrapper(object):
+
+    def __init__(self, threadlocals, ec):
+        # this makes a loop between 'self' and 'ec'.  It should not prevent
+        # the __del__ method here from being called.
+        threadlocals._next_generation += 1
+        self.generation = threadlocals._next_generation
+        self.ec = ec
+        ec._threadlocals_auto_free = self
+        self.ident = rthread.get_ident()
+
+    def __del__(self):
+        from pypy.module.thread.os_local import thread_is_stopping
+        # this is always called in another thread: the thread
+        # referenced by 'self.ec' has finished at that point, and
+        # we're just after the GC which finds no more references to
+        # 'ec' (and thus to 'self').
+        self.generation = r_ulonglong(0)
+        thread_is_stopping(self.ec)
+
+class ListECWrappers(rweaklist.RWeakListMixin):
+    pass
diff --git a/rpython/jit/backend/llgraph/runner.py 
b/rpython/jit/backend/llgraph/runner.py
--- a/rpython/jit/backend/llgraph/runner.py
+++ b/rpython/jit/backend/llgraph/runner.py
@@ -13,6 +13,7 @@
 
 from rpython.rtyper.llinterp import LLInterpreter, LLException
 from rpython.rtyper.lltypesystem import lltype, llmemory, rffi, rstr
+from rpython.rtyper.lltypesystem.lloperation import llop
 from rpython.rtyper import rclass
 
 from rpython.rlib.clibffi import FFI_DEFAULT_ABI
@@ -638,18 +639,9 @@
         return array.getlength()
 
     def bh_getarrayitem_gc(self, a, index, descr):
+        a = support.cast_arg(lltype.Ptr(descr.A), a)
+        array = a._obj
         assert index >= 0
-        if descr.A is descr.OUTERA:
-            a = support.cast_arg(lltype.Ptr(descr.A), a)
-        else:
-            # we use rffi.cast instead of support.cast_arg because the types
-            # might not be "compatible" enough from the lltype point of
-            # view. In particular, this happens when we use
-            # str_storage_getitem, in which an rpy_string is casted to
-            # rpy_string_as_Signed (or similar)
-            a = rffi.cast(lltype.Ptr(descr.OUTERA), a)
-            a = getattr(a, descr.OUTERA._arrayfld)
-        array = a._obj
         return support.cast_result(descr.A.OF, array.getitem(index))
 
     bh_getarrayitem_gc_pure_i = bh_getarrayitem_gc
@@ -714,6 +706,24 @@
         else:
             return self.bh_raw_load_i(struct, offset, descr)
 
+    def bh_gc_load_indexed_i(self, struct, index, scale, base_ofs, bytes):
+        if   bytes == 1: T = rffi.UCHAR
+        elif bytes == 2: T = rffi.USHORT
+        elif bytes == 4: T = rffi.UINT
+        elif bytes == 8: T = rffi.ULONGLONG
+        elif bytes == -1: T = rffi.SIGNEDCHAR
+        elif bytes == -2: T = rffi.SHORT
+        elif bytes == -4: T = rffi.INT
+        elif bytes == -8: T = rffi.LONGLONG
+        else: raise NotImplementedError(bytes)
+        x = llop.gc_load_indexed(T, struct, index, scale, base_ofs)
+        return lltype.cast_primitive(lltype.Signed, x)
+
+    def bh_gc_load_indexed_f(self, struct, index, scale, base_ofs, bytes):
+        if bytes != 8:
+            raise Exception("gc_load_indexed_f is only for 'double'!")
+        return llop.gc_load_indexed(rffi.DOUBLE, struct, index, scale, 
base_ofs)
+
     def bh_increment_debug_counter(self, addr):
         p = rffi.cast(rffi.CArrayPtr(lltype.Signed), addr)
         p[0] += 1
diff --git a/rpython/jit/backend/llsupport/llmodel.py 
b/rpython/jit/backend/llsupport/llmodel.py
--- a/rpython/jit/backend/llsupport/llmodel.py
+++ b/rpython/jit/backend/llsupport/llmodel.py
@@ -725,6 +725,16 @@
     def bh_raw_load_f(self, addr, offset, descr):
         return self.read_float_at_mem(addr, offset)
 
+    def bh_gc_load_indexed_i(self, addr, index, scale, base_ofs, bytes):
+        offset = base_ofs + scale * index
+        return self.read_int_at_mem(addr, offset, abs(bytes), bytes < 0)
+
+    def bh_gc_load_indexed_f(self, addr, index, scale, base_ofs, bytes):
+        # only for 'double'!
+        assert bytes == rffi.sizeof(lltype.Float)
+        offset = base_ofs + scale * index
+        return self.read_float_at_mem(addr, offset)
+
     def bh_new(self, sizedescr):
         return self.gc_ll_descr.gc_malloc(sizedescr)
 
diff --git a/rpython/jit/codewriter/jtransform.py 
b/rpython/jit/codewriter/jtransform.py
--- a/rpython/jit/codewriter/jtransform.py
+++ b/rpython/jit/codewriter/jtransform.py
@@ -1021,18 +1021,20 @@
             kind = getkind(op.result.concretetype)[0]
             return SpaceOperation('getinteriorfield_gc_%s' % kind, args,
                                   op.result)
-        elif isinstance(op.args[0].concretetype.TO, lltype.GcStruct):
-            # special-case 2: GcStruct with Array field
-            v_inst, c_field, v_index = op.args
-            STRUCT = v_inst.concretetype.TO
-            ARRAY = getattr(STRUCT, c_field.value)
-            assert isinstance(ARRAY, lltype.Array)
-            arraydescr = self.cpu.arraydescrof(STRUCT)
-            kind = getkind(op.result.concretetype)[0]
-            assert kind in ('i', 'f')
-            return SpaceOperation('getarrayitem_gc_%s' % kind,
-                                  [op.args[0], v_index, arraydescr],
-                                  op.result)
+        #elif isinstance(op.args[0].concretetype.TO, lltype.GcStruct):
+        #    # special-case 2: GcStruct with Array field
+        #    ---was added in the faster-rstruct branch,---
+        #    ---no longer directly supported---
+        #    v_inst, c_field, v_index = op.args
+        #    STRUCT = v_inst.concretetype.TO
+        #    ARRAY = getattr(STRUCT, c_field.value)
+        #    assert isinstance(ARRAY, lltype.Array)
+        #    arraydescr = self.cpu.arraydescrof(STRUCT)
+        #    kind = getkind(op.result.concretetype)[0]
+        #    assert kind in ('i', 'f')
+        #    return SpaceOperation('getarrayitem_gc_%s' % kind,
+        #                          [op.args[0], v_index, arraydescr],
+        #                          op.result)
         else:
             assert False, 'not supported'
 
@@ -1084,6 +1086,25 @@
         return SpaceOperation('raw_load_%s' % kind,
                               [op.args[0], op.args[1], descr], op.result)
 
+    def rewrite_op_gc_load_indexed(self, op):
+        T = op.result.concretetype
+        kind = getkind(T)[0]
+        assert kind != 'r'
+        descr = self.cpu.arraydescrof(rffi.CArray(T))
+        if (not isinstance(op.args[2], Constant) or
+            not isinstance(op.args[3], Constant)):
+            raise NotImplementedError("gc_load_indexed: 'scale' and 'base_ofs'"
+                                      " should be constants")
+        # xxx hard-code the size in bytes at translation time, which is
+        # probably fine and avoids lots of issues later
+        bytes = descr.get_item_size_in_bytes()
+        if descr.is_item_signed():
+            bytes = -bytes
+        c_bytes = Constant(bytes, lltype.Signed)
+        return SpaceOperation('gc_load_indexed_%s' % kind,
+                              [op.args[0], op.args[1],
+                               op.args[2], op.args[3], c_bytes], op.result)
+
     def _rewrite_equality(self, op, opname):
         arg0, arg1 = op.args
         if isinstance(arg0, Constant) and not arg0.value:
diff --git a/rpython/jit/metainterp/blackhole.py 
b/rpython/jit/metainterp/blackhole.py
--- a/rpython/jit/metainterp/blackhole.py
+++ b/rpython/jit/metainterp/blackhole.py
@@ -1434,6 +1434,13 @@
     def bhimpl_raw_load_f(cpu, addr, offset, arraydescr):
         return cpu.bh_raw_load_f(addr, offset, arraydescr)
 
+    @arguments("cpu", "r", "i", "i", "i", "i", returns="i")
+    def bhimpl_gc_load_indexed_i(cpu, addr, index, scale, base_ofs, bytes):
+        return cpu.bh_gc_load_indexed_i(addr, index,scale,base_ofs, bytes)
+    @arguments("cpu", "r", "i", "i", "i", "i", returns="f")
+    def bhimpl_gc_load_indexed_f(cpu, addr, index, scale, base_ofs, bytes):
+        return cpu.bh_gc_load_indexed_f(addr, index,scale,base_ofs, bytes)
+
     @arguments("r", "d", "d")
     def bhimpl_record_quasiimmut_field(struct, fielddescr, mutatefielddescr):
         pass
diff --git a/rpython/jit/metainterp/optimizeopt/heap.py 
b/rpython/jit/metainterp/optimizeopt/heap.py
--- a/rpython/jit/metainterp/optimizeopt/heap.py
+++ b/rpython/jit/metainterp/optimizeopt/heap.py
@@ -535,16 +535,10 @@
         cf.do_setfield(self, op)
 
     def optimize_GETARRAYITEM_GC_I(self, op):
-        # When using str_storage_getitem it might happen that op.getarg(0) is
-        # a virtual string, NOT an array. In that case, we cannot cache the
-        # getarrayitem as if it were an array, obviously. In theory we could
-        # improve by writing special code to interpter the buffer of the
-        # virtual string as if it were an array, but it looks complicate,
-        # fragile and not worth it.
         arrayinfo = self.ensure_ptr_info_arg0(op)
         indexb = self.getintbound(op.getarg(1))
         cf = None
-        if indexb.is_constant() and not arrayinfo.is_vstring():
+        if indexb.is_constant():
             index = indexb.getint()
             arrayinfo.getlenbound(None).make_gt_const(index)
             # use the cache on (arraydescr, index), which is a constant
@@ -561,7 +555,7 @@
         self.make_nonnull(op.getarg(0))
         self.emit_operation(op)
         # the remember the result of reading the array item
-        if cf is not None and not arrayinfo.is_vstring():
+        if cf is not None:
             arrayinfo.setitem(op.getdescr(), indexb.getint(),
                               self.get_box_replacement(op.getarg(0)),
                               self.get_box_replacement(op), cf,
diff --git a/rpython/jit/metainterp/optimizeopt/info.py 
b/rpython/jit/metainterp/optimizeopt/info.py
--- a/rpython/jit/metainterp/optimizeopt/info.py
+++ b/rpython/jit/metainterp/optimizeopt/info.py
@@ -24,9 +24,6 @@
     def is_virtual(self):
         return False
 
-    def is_vstring(self):
-        return False
-
     def is_precise(self):
         return False
 
diff --git a/rpython/jit/metainterp/optimizeopt/virtualize.py 
b/rpython/jit/metainterp/optimizeopt/virtualize.py
--- a/rpython/jit/metainterp/optimizeopt/virtualize.py
+++ b/rpython/jit/metainterp/optimizeopt/virtualize.py
@@ -277,10 +277,8 @@
             self.emit_operation(op)
 
     def optimize_GETARRAYITEM_GC_I(self, op):
-        # When using str_storage_getitem we op.getarg(0) is a string, NOT an
-        # array, hence the check. In that case, it will be forced
         opinfo = self.getptrinfo(op.getarg(0))
-        if opinfo and opinfo.is_virtual() and not opinfo.is_vstring():
+        if opinfo and opinfo.is_virtual():
             indexbox = self.get_constant_box(op.getarg(1))
             if indexbox is not None:
                 item = opinfo.getitem(op.getdescr(), indexbox.getint())
diff --git a/rpython/jit/metainterp/optimizeopt/vstring.py 
b/rpython/jit/metainterp/optimizeopt/vstring.py
--- a/rpython/jit/metainterp/optimizeopt/vstring.py
+++ b/rpython/jit/metainterp/optimizeopt/vstring.py
@@ -62,9 +62,6 @@
         self.mode = mode
         self.length = length
 
-    def is_vstring(self):
-        return True
-
     def getlenbound(self, mode):
         from rpython.jit.metainterp.optimizeopt import intutils
 
diff --git a/rpython/jit/metainterp/pyjitpl.py 
b/rpython/jit/metainterp/pyjitpl.py
--- a/rpython/jit/metainterp/pyjitpl.py
+++ b/rpython/jit/metainterp/pyjitpl.py
@@ -810,6 +810,27 @@
         return self.execute_with_descr(rop.RAW_LOAD_F, arraydescr,
                                        addrbox, offsetbox)
 
+    def _remove_symbolics(self, c):
+        if not we_are_translated():
+            from rpython.rtyper.lltypesystem import ll2ctypes
+            assert isinstance(c, ConstInt)
+            c = ConstInt(ll2ctypes.lltype2ctypes(c.value))
+        return c
+
+    @arguments("box", "box", "box", "box", "box")
+    def opimpl_gc_load_indexed_i(self, addrbox, indexbox,
+                                 scalebox, baseofsbox, bytesbox):
+        return self.execute(rop.GC_LOAD_INDEXED_I, addrbox, indexbox,
+                            self._remove_symbolics(scalebox),
+                            self._remove_symbolics(baseofsbox), bytesbox)
+
+    @arguments("box", "box", "box", "box", "box")
+    def opimpl_gc_load_indexed_f(self, addrbox, indexbox,
+                                 scalebox, baseofsbox, bytesbox):
+        return self.execute(rop.GC_LOAD_INDEXED_F, addrbox, indexbox,
+                            self._remove_symbolics(scalebox),
+                            self._remove_symbolics(baseofsbox), bytesbox)
+
     @arguments("box")
     def opimpl_hint_force_virtualizable(self, box):
         self.metainterp.gen_store_back_in_vable(box)
diff --git a/rpython/jit/metainterp/test/test_strstorage.py 
b/rpython/jit/metainterp/test/test_strstorage.py
--- a/rpython/jit/metainterp/test/test_strstorage.py
+++ b/rpython/jit/metainterp/test/test_strstorage.py
@@ -19,7 +19,7 @@
         res = self.interp_operations(f, [], supports_singlefloats=True)
         #
         kind = getkind(TYPE)[0] # 'i' or 'f'
-        self.check_operations_history({'getarrayitem_gc_%s' % kind: 1,
+        self.check_operations_history({'gc_load_indexed_%s' % kind: 1,
                                        'finish': 1})
         #
         if TYPE == lltype.SingleFloat:
@@ -29,8 +29,8 @@
             return longlong.int2singlefloat(res)
         return res
 
-    def str_storage_supported(self, TYPE):
-        py.test.skip('this is not a JIT test')
+    #def str_storage_supported(self, TYPE):
+    #    py.test.skip('this is not a JIT test')
 
     def test_force_virtual_str_storage(self):
         byteorder = sys.byteorder
@@ -48,6 +48,6 @@
             'strsetitem': 1,          # str forcing
             'call_pure_r': 1,         # str forcing (copystrcontent)
             'guard_no_exception': 1,  # str forcing
-            'getarrayitem_gc_i': 1,   # str_storage_getitem
+            'gc_load_indexed_i': 1,   # str_storage_getitem
             'finish': 1
             })
diff --git a/rpython/rlib/buffer.py b/rpython/rlib/buffer.py
--- a/rpython/rlib/buffer.py
+++ b/rpython/rlib/buffer.py
@@ -97,6 +97,17 @@
 
     def __init__(self, buffer, offset, size):
         self.readonly = buffer.readonly
+        if isinstance(buffer, SubBuffer):     # don't nest them
+            # we want a view (offset, size) over a view
+            # (buffer.offset, buffer.size) over buffer.buffer
+            at_most = buffer.size - offset
+            if size > at_most:
+                if at_most < 0:
+                    at_most = 0
+                size = at_most
+            offset += buffer.offset
+            buffer = buffer.buffer
+        #
         self.buffer = buffer
         self.offset = offset
         self.size = size
diff --git a/rpython/rlib/objectmodel.py b/rpython/rlib/objectmodel.py
--- a/rpython/rlib/objectmodel.py
+++ b/rpython/rlib/objectmodel.py
@@ -335,6 +335,25 @@
     # XXX this can be made more efficient in the future
     return bytearray(str(i))
 
+def fetch_translated_config():
+    """Returns the config that is current when translating.
+    Returns None if not translated.
+    """
+    return None
+
+class Entry(ExtRegistryEntry):
+    _about_ = fetch_translated_config
+
+    def compute_result_annotation(self):
+        config = self.bookkeeper.annotator.translator.config
+        return self.bookkeeper.immutablevalue(config)
+
+    def specialize_call(self, hop):
+        from rpython.rtyper.lltypesystem import lltype
+        translator = hop.rtyper.annotator.translator
+        hop.exception_cannot_occur()
+        return hop.inputconst(lltype.Void, translator.config)
+
 # ____________________________________________________________
 
 class FREED_OBJECT(object):
diff --git a/rpython/rlib/rstruct/nativefmttable.py 
b/rpython/rlib/rstruct/nativefmttable.py
--- a/rpython/rlib/rstruct/nativefmttable.py
+++ b/rpython/rlib/rstruct/nativefmttable.py
@@ -11,7 +11,6 @@
 from rpython.rlib.rstruct.standardfmttable import native_is_bigendian
 from rpython.rlib.rstruct.error import StructError
 from rpython.rlib.unroll import unrolling_iterable
-from rpython.rlib.strstorage import str_storage_getitem
 from rpython.rtyper.lltypesystem import lltype, rffi
 from rpython.rtyper.tool import rffi_platform
 from rpython.translator.tool.cbuild import ExternalCompilationInfo
diff --git a/rpython/rlib/rstruct/standardfmttable.py 
b/rpython/rlib/rstruct/standardfmttable.py
--- a/rpython/rlib/rstruct/standardfmttable.py
+++ b/rpython/rlib/rstruct/standardfmttable.py
@@ -12,7 +12,7 @@
 from rpython.rlib.rstruct import ieee
 from rpython.rlib.rstruct.error import StructError, StructOverflowError
 from rpython.rlib.unroll import unrolling_iterable
-from rpython.rlib.strstorage import str_storage_getitem, str_storage_supported
+from rpython.rlib.strstorage import str_storage_getitem
 from rpython.rlib import rarithmetic
 from rpython.rtyper.lltypesystem import rffi
 
@@ -185,13 +185,14 @@
             data = fmtiter.read(size)
             fmtiter.appendobj(ieee.unpack_float(data, fmtiter.bigendian))
             return
-        if not str_storage_supported(TYPE):
-            # this happens e.g. on win32 and ARM32: we cannot read the string
-            # content as an array of doubles because it's not properly
-            # aligned. But we can read a longlong and convert to float
-            assert TYPE == rffi.DOUBLE
-            assert rffi.sizeof(TYPE) == 8
-            return unpack_longlong2float(fmtiter)
+        ## XXX check if the following code is still needed
+        ## if not str_storage_supported(TYPE):
+        ##     # this happens e.g. on win32 and ARM32: we cannot read the 
string
+        ##     # content as an array of doubles because it's not properly
+        ##     # aligned. But we can read a longlong and convert to float
+        ##     assert TYPE == rffi.DOUBLE
+        ##     assert rffi.sizeof(TYPE) == 8
+        ##     return unpack_longlong2float(fmtiter)
         try:
             # fast path
             val = unpack_fastpath(TYPE)(fmtiter)
@@ -246,7 +247,7 @@
 
     @specialize.argtype(0)
     def unpack_int_fastpath_maybe(fmtiter):
-        if fmtiter.bigendian != native_is_bigendian or not 
str_storage_supported(TYPE):
+        if fmtiter.bigendian != native_is_bigendian or not native_is_ieee754: 
## or not str_storage_supported(TYPE):
             return False
         try:
             intvalue = unpack_fastpath(TYPE)(fmtiter)
diff --git a/rpython/rlib/rthread.py b/rpython/rlib/rthread.py
--- a/rpython/rlib/rthread.py
+++ b/rpython/rlib/rthread.py
@@ -291,8 +291,6 @@
 # ____________________________________________________________
 #
 # Thread-locals.
-# KEEP THE REFERENCE ALIVE, THE GC DOES NOT FOLLOW THEM SO FAR!
-# We use _make_sure_does_not_move() to make sure the pointer will not move.
 
 
 class ThreadLocalField(object):
@@ -351,6 +349,11 @@
 
 
 class ThreadLocalReference(ThreadLocalField):
+    # A thread-local that points to an object.  The object stored in such
+    # a thread-local is kept alive as long as the thread is not finished
+    # (but only with our own GCs!  it seems not to work with Boehm...)
+    # (also, on Windows, if you're not making a DLL but an EXE, it will
+    # leak the objects when a thread finishes; see threadlocal.c.)
     _COUNT = 1
 
     def __init__(self, Cls, loop_invariant=False):
@@ -378,20 +381,36 @@
             assert isinstance(value, Cls) or value is None
             if we_are_translated():
                 from rpython.rtyper.annlowlevel import cast_instance_to_gcref
-                from rpython.rlib.rgc import _make_sure_does_not_move
-                from rpython.rlib.objectmodel import running_on_llinterp
                 gcref = cast_instance_to_gcref(value)
-                if not running_on_llinterp:
-                    if gcref:
-                        _make_sure_does_not_move(gcref)
                 value = lltype.cast_ptr_to_int(gcref)
                 setraw(value)
+                rgc.register_custom_trace_hook(TRACETLREF, _lambda_trace_tlref)
+                rgc.ll_writebarrier(_tracetlref_obj)
             else:
                 self.local.value = value
 
         self.get = get
         self.set = set
 
+        def _trace_tlref(gc, obj, callback, arg):
+            p = llmemory.NULL
+            while True:
+                p = llop.threadlocalref_enum(llmemory.Address, p)
+                if not p:
+                    break
+                gc._trace_callback(callback, arg, p + offset)
+        _lambda_trace_tlref = lambda: _trace_tlref
+        TRACETLREF = lltype.GcStruct('TRACETLREF')
+        _tracetlref_obj = lltype.malloc(TRACETLREF, immortal=True)
+
+    @staticmethod
+    def automatic_keepalive(config):
+        """Returns True if translated with a GC that keeps alive
+        the set() value until the end of the thread.  Returns False
+        if you need to keep it alive yourself.
+        """
+        return config.translation.gctransformer == "framework"
+
 
 tlfield_thread_ident = ThreadLocalField(lltype.Signed, "thread_ident",
                                         loop_invariant=True)
diff --git a/rpython/rlib/strstorage.py b/rpython/rlib/strstorage.py
--- a/rpython/rlib/strstorage.py
+++ b/rpython/rlib/strstorage.py
@@ -9,54 +9,31 @@
 #      rstr.py:copy_string_contents), which has no chance to work during
 #      tracing
 #
-#   2. use llop.raw_load: despite the name, llop.raw_load DOES support reading
-#      from GC pointers. However:
-#
-#        a. we would like to use a CompositeOffset as the offset (using the
-#           same logic as in rstr.py:_get_raw_str_buf), but this is not (yet)
-#           supported before translation: it works only if you pass an actual
-#           integer
-#
-#        b. raw_load from a GC pointer is not (yet) supported by the
-#           JIT. There are plans to introduce a gc_load operation: when it
-#           will be there, we could fix the issue above and actually use it to
-#           implement str_storage_getitem
-#
-#   3. the actual solution: cast rpy_string to a GcStruct which has the very
+#   2. cast rpy_string to a GcStruct which has the very
 #      same layout, with the only difference that its 'chars' field is no
 #      longer an Array(Char) but e.e. an Array(Signed). Then, we just need to
-#      read the appropriate index into the array
+#      read the appropriate index into the array.  To support this solution,
+#      the JIT's optimizer needed a few workarounds.  This was removed.
+#
+#   3. use the newly introduced 'llop.gc_load_indexed'.
+#
 
-from rpython.rtyper.lltypesystem import lltype, rffi, llmemory
-from rpython.rtyper.lltypesystem.rstr import STR, _get_raw_str_buf
+
+from rpython.rtyper.lltypesystem import lltype, llmemory
+from rpython.rtyper.lltypesystem.lloperation import llop
+from rpython.rtyper.lltypesystem.rstr import STR
 from rpython.rtyper.annlowlevel import llstr
-from rpython.rlib.objectmodel import specialize, we_are_translated
+from rpython.rlib.objectmodel import specialize
 
[email protected]()
-def _rpy_string_as_type(TP):
-    # sanity check that STR is actually what we think it is
-    assert STR._flds == {
-        'hash': lltype.Signed,
-        'chars': lltype.Array(lltype.Char, hints={'immutable': True})
-        }
-    STR_AS_TP = lltype.GcStruct('rpy_string_as_%s' % TP,
-                                ('hash',  lltype.Signed),
-                                ('chars', lltype.Array(TP, hints={'immutable': 
True})))
-    return STR_AS_TP
-
[email protected](0)
-def str_storage_supported(TP):
-    # on some architectures (e.g. win32 and arm32) an array of longlongs needs
-    # to be aligned at 8 bytes boundaries, so we cannot safely cast from STR
-    # to STR_AS_TP. In that case, we str_storage_getitem is simply not
-    # supported
-    return rffi.sizeof(TP) <= rffi.sizeof(lltype.Signed)
 
 @specialize.ll()
-def str_storage_getitem(TP, s, index):
-    assert str_storage_supported(TP) # sanity check
-    STR_AS_TP = _rpy_string_as_type(TP)
+def str_storage_getitem(TP, s, byte_offset):
+    # WARNING: the 'byte_offset' is, as its name says, measured in bytes;
+    # however, it should be aligned for TP, otherwise on some platforms this
+    # code will crash!
     lls = llstr(s)
-    str_as_tp = rffi.cast(lltype.Ptr(STR_AS_TP), lls)
-    index = index / rffi.sizeof(TP)
-    return str_as_tp.chars[index]
+    base_ofs = (llmemory.offsetof(STR, 'chars') +
+                llmemory.itemoffsetof(STR.chars, 0))
+    scale_factor = llmemory.sizeof(lltype.Char)
+    return llop.gc_load_indexed(TP, lls, byte_offset,
+                                scale_factor, base_ofs)
diff --git a/rpython/rlib/test/test_buffer.py b/rpython/rlib/test/test_buffer.py
--- a/rpython/rlib/test/test_buffer.py
+++ b/rpython/rlib/test/test_buffer.py
@@ -45,3 +45,9 @@
     ssbuf = SubBuffer(sbuf, 3, 2)
     assert ssbuf.getslice(0, 2, 1, 2) == 'ld'
     assert ssbuf.as_str_and_offset_maybe() == ('hello world', 9)
+
+def test_repeated_subbuffer():
+    buf = StringBuffer('x' * 10000)
+    for i in range(9999, 9, -1):
+        buf = SubBuffer(buf, 1, i)
+    assert buf.getlength() == 10
diff --git a/rpython/rlib/test/test_objectmodel.py 
b/rpython/rlib/test/test_objectmodel.py
--- a/rpython/rlib/test/test_objectmodel.py
+++ b/rpython/rlib/test/test_objectmodel.py
@@ -6,7 +6,8 @@
     prepare_dict_update, reversed_dict, specialize, enforceargs, newlist_hint,
     resizelist_hint, is_annotation_constant, always_inline, NOT_CONSTANT,
     iterkeys_with_hash, iteritems_with_hash, contains_with_hash,
-    setitem_with_hash, getitem_with_hash, delitem_with_hash, import_from_mixin)
+    setitem_with_hash, getitem_with_hash, delitem_with_hash, import_from_mixin,
+    fetch_translated_config)
 from rpython.translator.translator import TranslationContext, graphof
 from rpython.rtyper.test.tool import BaseRtypingTest
 from rpython.rtyper.test.test_llinterp import interpret
@@ -439,6 +440,13 @@
         res = self.interpret(f, [42])
         assert res == 84
 
+    def test_fetch_translated_config(self):
+        assert fetch_translated_config() is None
+        def f():
+            return fetch_translated_config().translation.continuation
+        res = self.interpret(f, [])
+        assert res is False
+
 
 def test_specialize_decorator():
     def f():
diff --git a/rpython/rlib/test/test_rthread.py 
b/rpython/rlib/test/test_rthread.py
--- a/rpython/rlib/test/test_rthread.py
+++ b/rpython/rlib/test/test_rthread.py
@@ -1,6 +1,7 @@
 import gc, time
 from rpython.rlib.rthread import *
 from rpython.rlib.rarithmetic import r_longlong
+from rpython.rlib import objectmodel
 from rpython.translator.c.test.test_boehm import AbstractGCTestClass
 from rpython.rtyper.lltypesystem import lltype, rffi
 import py
@@ -240,3 +241,60 @@
 
 class TestUsingFramework(AbstractThreadTests):
     gcpolicy = 'minimark'
+
+    def test_tlref_keepalive(self, no__thread=True):
+        import weakref
+        from rpython.config.translationoption import SUPPORT__THREAD
+
+        if not (SUPPORT__THREAD or no__thread):
+            py.test.skip("no __thread support here")
+
+        class FooBar(object):
+            pass
+        t = ThreadLocalReference(FooBar)
+
+        def tset():
+            x1 = FooBar()
+            t.set(x1)
+            return weakref.ref(x1)
+        tset._dont_inline_ = True
+
+        class WrFromThread:
+            pass
+        wr_from_thread = WrFromThread()
+
+        def f():
+            config = objectmodel.fetch_translated_config()
+            assert t.automatic_keepalive(config) is True
+            wr = tset()
+            import gc; gc.collect()   # 'x1' should not be collected
+            x2 = t.get()
+            assert x2 is not None
+            assert wr() is not None
+            assert wr() is x2
+            return wr
+
+        def thread_entry_point():
+            wr = f()
+            wr_from_thread.wr = wr
+            wr_from_thread.seen = True
+
+        def main():
+            wr_from_thread.seen = False
+            start_new_thread(thread_entry_point, ())
+            wr1 = f()
+            time.sleep(0.5)
+            assert wr_from_thread.seen is True
+            wr2 = wr_from_thread.wr
+            import gc; gc.collect()      # wr2() should be collected here
+            assert wr1() is not None     # this thread, still running
+            assert wr2() is None         # other thread, not running any more
+            return 42
+
+        extra_options = {'no__thread': no__thread, 'shared': True}
+        fn = self.getcompiled(main, [], extra_options=extra_options)
+        res = fn()
+        assert res == 42
+
+    def test_tlref_keepalive__thread(self):
+        self.test_tlref_keepalive(no__thread=False)
diff --git a/rpython/rlib/test/test_strstorage.py 
b/rpython/rlib/test/test_strstorage.py
--- a/rpython/rlib/test/test_strstorage.py
+++ b/rpython/rlib/test/test_strstorage.py
@@ -2,7 +2,7 @@
 import sys
 import struct
 from rpython.rtyper.lltypesystem import lltype, rffi
-from rpython.rlib.strstorage import str_storage_getitem, str_storage_supported
+from rpython.rlib.strstorage import str_storage_getitem
 from rpython.rlib.rarithmetic import r_singlefloat
 from rpython.rtyper.test.tool import BaseRtypingTest
 
@@ -10,14 +10,14 @@
 
 class BaseStrStorageTest:
 
-    def test_str_getitem_supported(self):
-        if IS_32BIT:
-            expected = False
-        else:
-            expected = True
-        #
-        assert self.str_storage_supported(rffi.LONGLONG) == expected
-        assert self.str_storage_supported(rffi.DOUBLE) == expected
+    ## def test_str_getitem_supported(self):
+    ##     if IS_32BIT:
+    ##         expected = False
+    ##     else:
+    ##         expected = True
+    ##     #
+    ##     assert self.str_storage_supported(rffi.LONGLONG) == expected
+    ##     assert self.str_storage_supported(rffi.DOUBLE) == expected
 
     def test_signed(self):
         buf = struct.pack('@ll', 42, 43)
@@ -34,8 +34,8 @@
         assert int(x) == 43
 
     def test_float(self):
-        if not str_storage_supported(lltype.Float):
-            py.test.skip('str_storage_getitem(lltype.Float) not supported on 
this machine')
+        ## if not str_storage_supported(lltype.Float):
+        ##     py.test.skip('str_storage_getitem(lltype.Float) not supported 
on this machine')
         buf = struct.pack('@dd', 12.3, 45.6)
         size = struct.calcsize('@d')
         assert self.str_storage_getitem(lltype.Float, buf, 0) == 12.3
@@ -52,20 +52,45 @@
 
 class TestDirect(BaseStrStorageTest):
 
-    def str_storage_supported(self, TYPE):
-        return str_storage_supported(TYPE)
+    ## def str_storage_supported(self, TYPE):
+    ##     return str_storage_supported(TYPE)
 
     def str_storage_getitem(self, TYPE, buf, offset):
         return str_storage_getitem(TYPE, buf, offset)
 
 class TestRTyping(BaseStrStorageTest, BaseRtypingTest):
 
-    def str_storage_supported(self, TYPE):
-        def fn():
-            return str_storage_supported(TYPE)
-        return self.interpret(fn, [])
+    ## def str_storage_supported(self, TYPE):
+    ##     def fn():
+    ##         return str_storage_supported(TYPE)
+    ##     return self.interpret(fn, [])
 
     def str_storage_getitem(self, TYPE, buf, offset):
         def fn(offset):
             return str_storage_getitem(TYPE, buf, offset)
         return self.interpret(fn, [offset])
+
+
+class TestCompiled(BaseStrStorageTest):
+    cache = {}
+
+    def str_storage_getitem(self, TYPE, buf, offset):
+        if TYPE not in self.cache:
+            from rpython.translator.c.test.test_genc import compile
+
+            assert isinstance(TYPE, lltype.Primitive)
+            if TYPE in (lltype.Float, lltype.SingleFloat):
+                TARGET_TYPE = lltype.Float
+            else:
+                TARGET_TYPE = lltype.Signed
+
+            def llf(buf, offset):
+                x = str_storage_getitem(TYPE, buf, offset)
+                return lltype.cast_primitive(TARGET_TYPE, x)
+
+            fn = compile(llf, [str, int])
+            self.cache[TYPE] = fn
+        #
+        fn = self.cache[TYPE]
+        x = fn(buf, offset)
+        return lltype.cast_primitive(TYPE, x)
diff --git a/rpython/rtyper/llinterp.py b/rpython/rtyper/llinterp.py
--- a/rpython/rtyper/llinterp.py
+++ b/rpython/rtyper/llinterp.py
@@ -950,6 +950,9 @@
         return self.op_raw_load(RESTYPE, _address_of_thread_local(), offset)
     op_threadlocalref_get.need_result_type = True
 
+    def op_threadlocalref_enum(self, prev):
+        raise NotImplementedError
+
     # __________________________________________________________
     # operations on addresses
 
diff --git a/rpython/rtyper/lltypesystem/ll2ctypes.py 
b/rpython/rtyper/lltypesystem/ll2ctypes.py
--- a/rpython/rtyper/lltypesystem/ll2ctypes.py
+++ b/rpython/rtyper/lltypesystem/ll2ctypes.py
@@ -902,6 +902,14 @@
                 llobj = ctypes.sizeof(get_ctypes_type(llobj.TYPE)) * 
llobj.repeat
             elif isinstance(llobj, ComputedIntSymbolic):
                 llobj = llobj.compute_fn()
+            elif isinstance(llobj, llmemory.CompositeOffset):
+                llobj = sum([lltype2ctypes(c) for c in llobj.offsets])
+            elif isinstance(llobj, llmemory.FieldOffset):
+                CSTRUCT = get_ctypes_type(llobj.TYPE)
+                llobj = getattr(CSTRUCT, llobj.fldname).offset
+            elif isinstance(llobj, llmemory.ArrayItemsOffset):
+                CARRAY = get_ctypes_type(llobj.TYPE)
+                llobj = CARRAY.items.offset
             else:
                 raise NotImplementedError(llobj)  # don't know about symbolic 
value
 
diff --git a/rpython/rtyper/lltypesystem/lloperation.py 
b/rpython/rtyper/lltypesystem/lloperation.py
--- a/rpython/rtyper/lltypesystem/lloperation.py
+++ b/rpython/rtyper/lltypesystem/lloperation.py
@@ -417,6 +417,7 @@
     'raw_load':             LLOp(sideeffects=False, canrun=True),
     'raw_store':            LLOp(canrun=True),
     'bare_raw_store':       LLOp(),
+    'gc_load_indexed':      LLOp(sideeffects=False, canrun=True),
     'stack_malloc':         LLOp(), # mmh
     'track_alloc_start':    LLOp(),
     'track_alloc_stop':     LLOp(),
@@ -544,8 +545,9 @@
     'getslice':             LLOp(canraise=(Exception,)),
     'check_and_clear_exc':  LLOp(),
 
-    'threadlocalref_addr':  LLOp(sideeffects=False),  # get (or make) addr of 
tl
+    'threadlocalref_addr':  LLOp(),                   # get (or make) addr of 
tl
     'threadlocalref_get':   LLOp(sideeffects=False),  # read field (no check)
+    'threadlocalref_enum':  LLOp(sideeffects=False),  # enum all 
threadlocalrefs
 
     # __________ debugging __________
     'debug_view':           LLOp(),
diff --git a/rpython/rtyper/lltypesystem/opimpl.py 
b/rpython/rtyper/lltypesystem/opimpl.py
--- a/rpython/rtyper/lltypesystem/opimpl.py
+++ b/rpython/rtyper/lltypesystem/opimpl.py
@@ -702,6 +702,17 @@
     return p[0]
 op_raw_load.need_result_type = True
 
+def op_gc_load_indexed(TVAL, p, index, scale, base_ofs):
+    # 'base_ofs' should be a CompositeOffset(..., ArrayItemsOffset).
+    # 'scale' should be a llmemory.sizeof().
+    from rpython.rtyper.lltypesystem import rffi
+    ofs = base_ofs + scale * index
+    if isinstance(ofs, int):
+        return op_raw_load(TVAL, p, ofs)
+    p = rffi.cast(rffi.CArrayPtr(TVAL), llmemory.cast_ptr_to_adr(p) + ofs)
+    return p[0]
+op_gc_load_indexed.need_result_type = True
+
 def op_likely(x):
     assert isinstance(x, bool)
     return x
diff --git a/rpython/rtyper/lltypesystem/rbytearray.py 
b/rpython/rtyper/lltypesystem/rbytearray.py
--- a/rpython/rtyper/lltypesystem/rbytearray.py
+++ b/rpython/rtyper/lltypesystem/rbytearray.py
@@ -8,10 +8,10 @@
 def mallocbytearray(size):
     return lltype.malloc(BYTEARRAY, size)
 
-_, _, _, copy_bytearray_contents = rstr._new_copy_contents_fun(BYTEARRAY, 
BYTEARRAY,
+_, _, copy_bytearray_contents = rstr._new_copy_contents_fun(BYTEARRAY, 
BYTEARRAY,
                                                          lltype.Char,
                                                          'bytearray')
-_, _, _, copy_bytearray_contents_from_str = 
rstr._new_copy_contents_fun(rstr.STR,
+_, _, copy_bytearray_contents_from_str = rstr._new_copy_contents_fun(rstr.STR,
                                                                   BYTEARRAY,
                                                                   lltype.Char,
                                                                   
'bytearray_from_str')
diff --git a/rpython/rtyper/lltypesystem/rstr.py 
b/rpython/rtyper/lltypesystem/rstr.py
--- a/rpython/rtyper/lltypesystem/rstr.py
+++ b/rpython/rtyper/lltypesystem/rstr.py
@@ -136,15 +136,13 @@
     copy_raw_to_string = func_with_new_name(copy_raw_to_string,
                                               'copy_raw_to_%s' % name)
 
-    return _get_raw_buf, copy_string_to_raw, copy_raw_to_string, 
copy_string_contents
+    return copy_string_to_raw, copy_raw_to_string, copy_string_contents
 
-(_get_raw_str_buf,
- copy_string_to_raw,
+(copy_string_to_raw,
  copy_raw_to_string,
  copy_string_contents) = _new_copy_contents_fun(STR, STR, Char, 'string')
 
-(_get_raw_unicode_buf,
- copy_unicode_to_raw,
+(copy_unicode_to_raw,
  copy_raw_to_unicode,
  copy_unicode_contents) = _new_copy_contents_fun(UNICODE, UNICODE, UniChar, 
'unicode')
 
diff --git a/rpython/translator/c/funcgen.py b/rpython/translator/c/funcgen.py
--- a/rpython/translator/c/funcgen.py
+++ b/rpython/translator/c/funcgen.py
@@ -299,7 +299,7 @@
     def gen_op(self, op):
         macro = 'OP_%s' % op.opname.upper()
         line = None
-        if op.opname.startswith('gc_'):
+        if op.opname.startswith('gc_') and op.opname != 'gc_load_indexed':
             meth = getattr(self.gcpolicy, macro, None)
             if meth:
                 line = meth(self, op)
@@ -709,6 +709,19 @@
           "%(result)s = ((%(typename)s) (((char *)%(addr)s) + %(offset)s))[0];"
           % locals())
 
+    def OP_GC_LOAD_INDEXED(self, op):
+        addr = self.expr(op.args[0])
+        index = self.expr(op.args[1])
+        scale = self.expr(op.args[2])
+        base_ofs = self.expr(op.args[3])
+        result = self.expr(op.result)
+        TYPE = op.result.concretetype
+        typename = cdecl(self.db.gettype(TYPE).replace('@', '*@'), '')
+        return (
+          "%(result)s = ((%(typename)s) (((char *)%(addr)s) + "
+          "%(base_ofs)s + %(scale)s * %(index)s))[0];"
+          % locals())
+
     def OP_CAST_PRIMITIVE(self, op):
         TYPE = self.lltypemap(op.result)
         val =  self.expr(op.args[0])
diff --git a/rpython/translator/c/genc.py b/rpython/translator/c/genc.py
--- a/rpython/translator/c/genc.py
+++ b/rpython/translator/c/genc.py
@@ -733,6 +733,9 @@
     print >> f, 'struct pypy_threadlocal_s {'
     print >> f, '\tint ready;'
     print >> f, '\tchar *stack_end;'
+    print >> f, '\tstruct pypy_threadlocal_s *prev, *next;'
+    # note: if the four fixed fields above are changed, you need
+    # to adapt threadlocal.c's linkedlist_head declaration too
     for field in fields:
         typename = database.gettype(field.FIELDTYPE)
         print >> f, '\t%s;' % cdecl(typename, field.fieldname)
diff --git a/rpython/translator/c/src/threadlocal.c 
b/rpython/translator/c/src/threadlocal.c
--- a/rpython/translator/c/src/threadlocal.c
+++ b/rpython/translator/c/src/threadlocal.c
@@ -3,20 +3,42 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#ifndef _WIN32
-# include <pthread.h>
-#endif
 #include "src/threadlocal.h"
 
 
+pthread_key_t pypy_threadlocal_key
+#ifdef _WIN32
+= TLS_OUT_OF_INDEXES
+#endif
+;
+
+static struct pypy_threadlocal_s linkedlist_head = {
+    -1,                     /* ready     */
+    NULL,                   /* stack_end */
+    &linkedlist_head,       /* prev      */
+    &linkedlist_head };     /* next      */
+
+struct pypy_threadlocal_s *
+_RPython_ThreadLocals_Enum(struct pypy_threadlocal_s *prev)
+{
+    if (prev == NULL)
+        prev = &linkedlist_head;
+    if (prev->next == &linkedlist_head)
+        return NULL;
+    return prev->next;
+}
+
 static void _RPy_ThreadLocals_Init(void *p)
 {
+    struct pypy_threadlocal_s *tls = (struct pypy_threadlocal_s *)p;
+    struct pypy_threadlocal_s *oldnext;
     memset(p, 0, sizeof(struct pypy_threadlocal_s));
+
 #ifdef RPY_TLOFS_p_errno
-    ((struct pypy_threadlocal_s *)p)->p_errno = &errno;
+    tls->p_errno = &errno;
 #endif
 #ifdef RPY_TLOFS_thread_ident
-    ((struct pypy_threadlocal_s *)p)->thread_ident =
+    tls->thread_ident =
 #    ifdef _WIN32
         GetCurrentThreadId();
 #    else
@@ -26,58 +48,70 @@
                   where it is not the case are rather old nowadays. */
 #    endif
 #endif
-    ((struct pypy_threadlocal_s *)p)->ready = 42;
+    oldnext = linkedlist_head.next;
+    tls->prev = &linkedlist_head;
+    tls->next = oldnext;
+    linkedlist_head.next = tls;
+    oldnext->prev = tls;
+    tls->ready = 42;
 }
 
+static void threadloc_unlink(void *p)
+{
+    struct pypy_threadlocal_s *tls = (struct pypy_threadlocal_s *)p;
+    if (tls->ready == 42) {
+        tls->ready = 0;
+        tls->next->prev = tls->prev;
+        tls->prev->next = tls->next;
+        memset(tls, 0xDD, sizeof(struct pypy_threadlocal_s));  /* debug */
+    }
+#ifndef USE___THREAD
+    free(p);
+#endif
+}
 
-/* ------------------------------------------------------------ */
-#ifdef USE___THREAD
-/* ------------------------------------------------------------ */
-
-
-/* in this situation, we always have one full 'struct pypy_threadlocal_s'
-   available, managed by gcc. */
-__thread struct pypy_threadlocal_s pypy_threadlocal;
+#ifdef _WIN32
+/* xxx Defines a DllMain() function.  It's horrible imho: it only
+   works if we happen to compile a DLL (not a EXE); and of course you
+   get link-time errors if two files in the same DLL do the same.
+   There are some alternatives known, but they are horrible in other
+   ways (e.g. using undocumented behavior).  This seems to be the
+   simplest, but feel free to fix if you need that.
+ */
+BOOL WINAPI DllMain(HINSTANCE hinstDLL,
+                    DWORD     reason_for_call,
+                    LPVOID    reserved)
+{
+    LPVOID p;
+    switch (reason_for_call) {
+    case DLL_THREAD_DETACH:
+        if (pypy_threadlocal_key != TLS_OUT_OF_INDEXES) {
+            p = TlsGetValue(pypy_threadlocal_key);
+            if (p != NULL) {
+                TlsSetValue(pypy_threadlocal_key, NULL);
+                threadloc_unlink(p);
+            }
+        }
+        break;
+    default:
+        break;
+    }
+    return TRUE;
+}
+#endif
 
 void RPython_ThreadLocals_ProgramInit(void)
 {
-    _RPy_ThreadLocals_Init(&pypy_threadlocal);
-}
-
-char *_RPython_ThreadLocals_Build(void)
-{
-    RPyAssert(pypy_threadlocal.ready == 0, "corrupted thread-local");
-    _RPy_ThreadLocals_Init(&pypy_threadlocal);
-    return (char *)&pypy_threadlocal;
-}
-
-void RPython_ThreadLocals_ThreadDie(void)
-{
-    memset(&pypy_threadlocal, 0xDD,
-           sizeof(struct pypy_threadlocal_s));  /* debug */
-    pypy_threadlocal.ready = 0;
-}
-
-
-/* ------------------------------------------------------------ */
-#else
-/* ------------------------------------------------------------ */
-
-
-/* this is the case where the 'struct pypy_threadlocal_s' is allocated
-   explicitly, with malloc()/free(), and attached to (a single) thread-
-   local key using the API of Windows or pthread. */
-
-pthread_key_t pypy_threadlocal_key;
-
-
-void RPython_ThreadLocals_ProgramInit(void)
-{
+    /* Initialize the pypy_threadlocal_key, together with a destructor
+       that will be called every time a thread shuts down (if there is
+       a non-null thread-local value).  This is needed even in the
+       case where we use '__thread' below, for the destructor.
+    */
 #ifdef _WIN32
     pypy_threadlocal_key = TlsAlloc();
     if (pypy_threadlocal_key == TLS_OUT_OF_INDEXES)
 #else
-    if (pthread_key_create(&pypy_threadlocal_key, NULL) != 0)
+    if (pthread_key_create(&pypy_threadlocal_key, threadloc_unlink) != 0)
 #endif
     {
         fprintf(stderr, "Internal RPython error: "
@@ -87,6 +121,45 @@
     _RPython_ThreadLocals_Build();
 }
 
+
+/* ------------------------------------------------------------ */
+#ifdef USE___THREAD
+/* ------------------------------------------------------------ */
+
+
+/* in this situation, we always have one full 'struct pypy_threadlocal_s'
+   available, managed by gcc. */
+__thread struct pypy_threadlocal_s pypy_threadlocal;
+
+char *_RPython_ThreadLocals_Build(void)
+{
+    RPyAssert(pypy_threadlocal.ready == 0, "corrupted thread-local");
+    _RPy_ThreadLocals_Init(&pypy_threadlocal);
+
+    /* we also set up &pypy_threadlocal as a POSIX thread-local variable,
+       because we need the destructor behavior. */
+    pthread_setspecific(pypy_threadlocal_key, (void *)&pypy_threadlocal);
+
+    return (char *)&pypy_threadlocal;
+}
+
+void RPython_ThreadLocals_ThreadDie(void)
+{
+    pthread_setspecific(pypy_threadlocal_key, NULL);
+    threadloc_unlink(&pypy_threadlocal);
+}
+
+
+/* ------------------------------------------------------------ */
+#else
+/* ------------------------------------------------------------ */
+
+
+/* this is the case where the 'struct pypy_threadlocal_s' is allocated
+   explicitly, with malloc()/free(), and attached to (a single) thread-
+   local key using the API of Windows or pthread. */
+
+
 char *_RPython_ThreadLocals_Build(void)
 {
     void *p = malloc(sizeof(struct pypy_threadlocal_s));
@@ -105,8 +178,7 @@
     void *p = _RPy_ThreadLocals_Get();
     if (p != NULL) {
         _RPy_ThreadLocals_Set(NULL);
-        memset(p, 0xDD, sizeof(struct pypy_threadlocal_s));  /* debug */
-        free(p);
+        threadloc_unlink(p);   /* includes free(p) */
     }
 }
 
diff --git a/rpython/translator/c/src/threadlocal.h 
b/rpython/translator/c/src/threadlocal.h
--- a/rpython/translator/c/src/threadlocal.h
+++ b/rpython/translator/c/src/threadlocal.h
@@ -13,14 +13,18 @@
    to die. */
 RPY_EXTERN void RPython_ThreadLocals_ThreadDie(void);
 
-/* There are two llops: 'threadlocalref_addr' and 'threadlocalref_make'.
-   They both return the address of the thread-local structure (of the
-   C type 'struct pypy_threadlocal_s').  The difference is that
-   OP_THREADLOCALREF_MAKE() checks if we have initialized this thread-
-   local structure in the current thread, and if not, calls the following
-   helper. */
+/* 'threadlocalref_addr' returns the address of the thread-local
+   structure (of the C type 'struct pypy_threadlocal_s').  It first
+   checks if we have initialized this thread-local structure in the
+   current thread, and if not, calls the following helper. */
 RPY_EXTERN char *_RPython_ThreadLocals_Build(void);
 
+RPY_EXTERN struct pypy_threadlocal_s *
+_RPython_ThreadLocals_Enum(struct pypy_threadlocal_s *prev);
+
+#define OP_THREADLOCALREF_ENUM(p, r)            \
+    r = _RPython_ThreadLocals_Enum(p)
+
 
 /* ------------------------------------------------------------ */
 #ifdef USE___THREAD
diff --git a/rpython/translator/c/test/test_boehm.py 
b/rpython/translator/c/test/test_boehm.py
--- a/rpython/translator/c/test/test_boehm.py
+++ b/rpython/translator/c/test/test_boehm.py
@@ -23,6 +23,7 @@
 class AbstractGCTestClass(object):
     gcpolicy = "boehm"
     use_threads = False
+    extra_options = {}
 
     # deal with cleanups
     def setup_method(self, meth):
@@ -33,8 +34,10 @@
             #print "CLEANUP"
             self._cleanups.pop()()
 
-    def getcompiled(self, func, argstypelist=[], annotatorpolicy=None):
-        return compile(func, argstypelist, gcpolicy=self.gcpolicy, 
thread=self.use_threads)
+    def getcompiled(self, func, argstypelist=[], annotatorpolicy=None,
+                    extra_options={}):
+        return compile(func, argstypelist, gcpolicy=self.gcpolicy,
+                       thread=self.use_threads, **extra_options)
 
 
 class TestUsingBoehm(AbstractGCTestClass):
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to