Author: Ronan Lamy <ronan.l...@gmail.com>
Branch: testing-cleanup
Changeset: r85299:5181a00cb020
Date: 2016-06-21 04:17 +0100
http://bitbucket.org/pypy/pypy/changeset/5181a00cb020/

Log:    hg merge default

diff too long, truncating to 2000 out of 2387 lines

diff --git a/.hgtags b/.hgtags
--- a/.hgtags
+++ b/.hgtags
@@ -26,3 +26,4 @@
 40497617ae91caa1a394d8be6f9cd2de31cb0628 release-pypy3.3-v5.2
 40497617ae91caa1a394d8be6f9cd2de31cb0628 release-pypy3.3-v5.2
 c09c19272c990a0611b17569a0085ad1ab00c8ff release-pypy2.7-v5.3
+7e8df3df96417c16c2d55b41352ec82c9c69c978 release-pypy2.7-v5.3.1
diff --git a/lib_pypy/greenlet.egg-info b/lib_pypy/greenlet.egg-info
--- a/lib_pypy/greenlet.egg-info
+++ b/lib_pypy/greenlet.egg-info
@@ -1,6 +1,6 @@
 Metadata-Version: 1.0
 Name: greenlet
-Version: 0.4.9
+Version: 0.4.10
 Summary: Lightweight in-process concurrent programming
 Home-page: https://github.com/python-greenlet/greenlet
 Author: Ralf Schmitt (for CPython), PyPy team
diff --git a/lib_pypy/greenlet.py b/lib_pypy/greenlet.py
--- a/lib_pypy/greenlet.py
+++ b/lib_pypy/greenlet.py
@@ -1,7 +1,7 @@
 import sys
 import _continuation
 
-__version__ = "0.4.9"
+__version__ = "0.4.10"
 
 # ____________________________________________________________
 # Exceptions
diff --git a/pypy/doc/cpython_differences.rst b/pypy/doc/cpython_differences.rst
--- a/pypy/doc/cpython_differences.rst
+++ b/pypy/doc/cpython_differences.rst
@@ -315,13 +315,28 @@
 
  - ``complex``
 
+ - ``str`` (empty or single-character strings only)
+
+ - ``unicode`` (empty or single-character strings only)
+
+ - ``tuple`` (empty tuples only)
+
+ - ``frozenset`` (empty frozenset only)
+
 This change requires some changes to ``id`` as well. ``id`` fulfills the
 following condition: ``x is y <=> id(x) == id(y)``. Therefore ``id`` of the
 above types will return a value that is computed from the argument, and can
 thus be larger than ``sys.maxint`` (i.e. it can be an arbitrary long).
 
-Notably missing from the list above are ``str`` and ``unicode``.  If your
-code relies on comparing strings with ``is``, then it might break in PyPy.
+Note that strings of length 2 or greater can be equal without being
+identical.  Similarly, ``x is (2,)`` is not necessarily true even if
+``x`` contains a tuple and ``x == (2,)``.  The uniqueness rules apply
+only to the particular cases described above.  The ``str``, ``unicode``,
+``tuple`` and ``frozenset`` rules were added in PyPy 5.4; before that, a
+test like ``if x is "?"`` or ``if x is ()`` could fail even if ``x`` was
+equal to ``"?"`` or ``()``.  The new behavior added in PyPy 5.4 is
+closer to CPython's, which caches precisely the empty tuple/frozenset,
+and (generally but not always) the strings and unicodes of length <= 1.
 
 Note that for floats there "``is``" only one object per "bit pattern"
 of the float.  So ``float('nan') is float('nan')`` is true on PyPy,
diff --git a/pypy/doc/index-of-release-notes.rst 
b/pypy/doc/index-of-release-notes.rst
--- a/pypy/doc/index-of-release-notes.rst
+++ b/pypy/doc/index-of-release-notes.rst
@@ -6,6 +6,7 @@
 
 .. toctree::
 
+   release-pypy2.7-v5.3.1.rst
    release-pypy2.7-v5.3.0.rst
    release-5.1.1.rst
    release-5.1.0.rst
diff --git a/pypy/doc/index-of-whatsnew.rst b/pypy/doc/index-of-whatsnew.rst
--- a/pypy/doc/index-of-whatsnew.rst
+++ b/pypy/doc/index-of-whatsnew.rst
@@ -7,6 +7,7 @@
 .. toctree::
 
    whatsnew-head.rst
+   whatsnew-pypy2-5.3.1.rst
    whatsnew-pypy2-5.3.0.rst
    whatsnew-5.1.0.rst
    whatsnew-5.0.0.rst
diff --git a/pypy/doc/release-pypy2.7-v5.3.1.rst 
b/pypy/doc/release-pypy2.7-v5.3.1.rst
new file mode 100644
--- /dev/null
+++ b/pypy/doc/release-pypy2.7-v5.3.1.rst
@@ -0,0 +1,41 @@
+==========
+PyPy 5.3.1
+==========
+
+We have released a bugfix for PyPy2.7-v5.3.0, released last week,
+due to issues_ reported by users.
+
+Thanks to those who reported the issues.
+
+.. _issues: http://doc.pypy.org/en/latest/whatsnew-pypy2-5.3.1.html
+
+What is PyPy?
+=============
+
+PyPy is a very compliant Python interpreter, almost a drop-in replacement for
+CPython 2.7. It's fast (`PyPy and CPython 2.7.x`_ performance comparison)
+due to its integrated tracing JIT compiler.
+
+We also welcome developers of other
+`dynamic languages`_ to see what RPython can do for them.
+
+This release supports:
+
+  * **x86** machines on most common operating systems
+    (Linux 32/64, Mac OS X 64, Windows 32, OpenBSD, FreeBSD),
+
+  * newer **ARM** hardware (ARMv6 or ARMv7, with VFPv3) running Linux,
+
+  * big- and little-endian variants of **PPC64** running Linux,
+
+  * **s390x** running Linux
+
+.. _`PyPy and CPython 2.7.x`: http://speed.pypy.org
+.. _`dynamic languages`: http://pypyjs.org
+
+Please update, and continue to help us make PyPy better.
+
+Cheers
+
+The PyPy Team
+
diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -1,10 +1,17 @@
-=========================
+==========================
 What's new in PyPy2.7 5.3+
-=========================
+==========================
 
 .. this is a revision shortly after release-pypy2.7-v5.3
 .. startrev: 873218a739f1
 
+.. 418b05f95db5
+Improve CPython compatibility for ``is``. Now code like ``if x is ():``
+works the same way as it does on CPython.  See 
http://pypy.readthedocs.io/en/latest/cpython_differences.html#object-identity-of-primitive-values-is-and-id
 .
+
+.. pull request #455
+Add sys.{get,set}dlopenflags, for cpyext extensions.
+
 .. branch: fix-gen-dfa
 
 Resolves an issue with the generator script to build the dfa for Python syntax.
@@ -22,3 +29,16 @@
 
 .. branch: incminimark-ll_assert
 .. branch: vmprof-openbsd
+
+.. branch: testing-cleanup
+
+Simplify handling of interp-level tests and make it more forward-
+compatible.
+
+.. branch: pyfile-tell
+Sync w_file with the c-level FILE* before returning FILE* in PyFile_AsFile
+
+.. branch: rw-PyString_AS_STRING
+Allow rw access to the char* returned from PyString_AS_STRING, also refactor
+PyStringObject to look like cpython's and allow subclassing PyString_Type and
+PyUnicode_Type
diff --git a/pypy/doc/whatsnew-pypy2-5.3.1.rst 
b/pypy/doc/whatsnew-pypy2-5.3.1.rst
new file mode 100644
--- /dev/null
+++ b/pypy/doc/whatsnew-pypy2-5.3.1.rst
@@ -0,0 +1,15 @@
+===========================
+What's new in PyPy2.7 5.3.1
+===========================
+
+.. this is a revision shortly after release-pypy2.7-v5.3.0
+.. startrev: f4d726d1a010
+
+
+A bug-fix release, merging these changes:
+
+  * Add include guards to pymem.h, fixes issue #2321
+
+  * Make vmprof build on OpenBSD, from pull request #456
+
+  * Fix ``bytearray('').replace('a', 'ab')``, issue #2324
diff --git a/pypy/module/cpyext/api.py b/pypy/module/cpyext/api.py
--- a/pypy/module/cpyext/api.py
+++ b/pypy/module/cpyext/api.py
@@ -1210,8 +1210,6 @@
         cpyext_type_init = self.cpyext_type_init
         self.cpyext_type_init = None
         for pto, w_type in cpyext_type_init:
-            if space.is_w(w_type, space.w_str):
-                pto.c_tp_itemsize = 1
             finish_type_1(space, pto)
             finish_type_2(space, pto, w_type)
 
@@ -1520,7 +1518,7 @@
     try:
         ll_libname = rffi.str2charp(path)
         try:
-            dll = rdynload.dlopen(ll_libname)
+            dll = rdynload.dlopen(ll_libname, space.sys.dlopenflags)
         finally:
             lltype.free(ll_libname, flavor='raw')
     except rdynload.DLOpenError as e:
diff --git a/pypy/module/cpyext/bytesobject.py 
b/pypy/module/cpyext/bytesobject.py
--- a/pypy/module/cpyext/bytesobject.py
+++ b/pypy/module/cpyext/bytesobject.py
@@ -6,7 +6,9 @@
 from pypy.module.cpyext.pyerrors import PyErr_BadArgument
 from pypy.module.cpyext.pyobject import (
     PyObject, PyObjectP, Py_DecRef, make_ref, from_ref, track_reference,
-    make_typedescr, get_typedescr, as_pyobj, Py_IncRef, get_w_obj_and_decref)
+    make_typedescr, get_typedescr, as_pyobj, Py_IncRef, get_w_obj_and_decref,
+    pyobj_has_w_obj)
+from pypy.objspace.std.bytesobject import W_BytesObject
 
 ##
 ## Implementation of PyStringObject
@@ -16,7 +18,7 @@
 ## -----------
 ##
 ## PyString_AsString() must return a (non-movable) pointer to the underlying
-## buffer, whereas pypy strings are movable.  C code may temporarily store
+## ob_sval, whereas pypy strings are movable.  C code may temporarily store
 ## this address and use it, as long as it owns a reference to the PyObject.
 ## There is no "release" function to specify that the pointer is not needed
 ## any more.
@@ -28,7 +30,7 @@
 ## --------
 ##
 ## PyStringObject contains two additional members: the ob_size and a pointer 
to a
-## char buffer; it may be NULL.
+## char ob_sval; it may be NULL.
 ##
 ## - A string allocated by pypy will be converted into a PyStringObject with a
 ##   NULL buffer.  The first time PyString_AsString() is called, memory is
@@ -41,6 +43,9 @@
 ##   the pypy string is created, and added to the global map of tracked
 ##   objects.  The buffer is then supposed to be immutable.
 ##
+##-  A buffer obtained from PyString_AS_STRING() could be mutable iff
+##   there is no corresponding pypy object for the string
+##
 ## - _PyString_Resize() works only on not-yet-pypy'd strings, and returns a
 ##   similar object.
 ##
@@ -53,7 +58,7 @@
 PyStringObjectStruct = lltype.ForwardReference()
 PyStringObject = lltype.Ptr(PyStringObjectStruct)
 PyStringObjectFields = PyVarObjectFields + \
-    (("ob_shash", rffi.LONG), ("ob_sstate", rffi.INT), ("buffer", rffi.CCHARP))
+    (("ob_shash", rffi.LONG), ("ob_sstate", rffi.INT), ("ob_sval", 
rffi.CArray(lltype.Char)))
 cpython_struct("PyStringObject", PyStringObjectFields, PyStringObjectStruct)
 
 @bootstrap_function
@@ -69,44 +74,43 @@
 
 def new_empty_str(space, length):
     """
-    Allocate a PyStringObject and its buffer, but without a corresponding
-    interpreter object.  The buffer may be mutated, until string_realize() is
+    Allocate a PyStringObject and its ob_sval, but without a corresponding
+    interpreter object.  The ob_sval may be mutated, until string_realize() is
     called.  Refcount of the result is 1.
     """
     typedescr = get_typedescr(space.w_str.layout.typedef)
-    py_obj = typedescr.allocate(space, space.w_str)
+    py_obj = typedescr.allocate(space, space.w_str, length)
     py_str = rffi.cast(PyStringObject, py_obj)
-
-    buflen = length + 1
-    py_str.c_ob_size = length
-    py_str.c_buffer = lltype.malloc(rffi.CCHARP.TO, buflen,
-                                    flavor='raw', zero=True,
-                                    add_memory_pressure=True)
+    py_str.c_ob_shash = -1
     py_str.c_ob_sstate = rffi.cast(rffi.INT, 0) # SSTATE_NOT_INTERNED
     return py_str
 
 def string_attach(space, py_obj, w_obj):
     """
-    Fills a newly allocated PyStringObject with the given string object. The
-    buffer must not be modified.
+    Copy RPython string object contents to a PyStringObject. The
+    c_ob_sval must not be modified.
     """
     py_str = rffi.cast(PyStringObject, py_obj)
-    py_str.c_ob_size = len(space.str_w(w_obj))
-    py_str.c_buffer = lltype.nullptr(rffi.CCHARP.TO)
+    s = space.str_w(w_obj)
+    if py_str.c_ob_size  < len(s):
+        raise oefmt(space.w_ValueError,
+            "string_attach called on object with ob_size %d but trying to 
store %d",
+            py_str.c_ob_size, len(s)) 
+    rffi.c_memcpy(py_str.c_ob_sval, rffi.str2charp(s), len(s))
+    py_str.c_ob_sval[len(s)] = '\0'
     py_str.c_ob_shash = space.hash_w(w_obj)
     py_str.c_ob_sstate = rffi.cast(rffi.INT, 1) # SSTATE_INTERNED_MORTAL
 
 def string_realize(space, py_obj):
     """
-    Creates the string in the interpreter. The PyStringObject buffer must not
+    Creates the string in the interpreter. The PyStringObject ob_sval must not
     be modified after this call.
     """
     py_str = rffi.cast(PyStringObject, py_obj)
-    if not py_str.c_buffer:
-        py_str.c_buffer = lltype.malloc(rffi.CCHARP.TO, py_str.c_ob_size + 1,
-                                    flavor='raw', zero=True)
-    s = rffi.charpsize2str(py_str.c_buffer, py_str.c_ob_size)
-    w_obj = space.wrap(s)
+    s = rffi.charpsize2str(py_str.c_ob_sval, py_str.c_ob_size)
+    w_type = from_ref(space, rffi.cast(PyObject, py_obj.c_ob_type))
+    w_obj = space.allocate_instance(W_BytesObject, w_type)
+    w_obj.__init__(s)
     py_str.c_ob_shash = space.hash_w(w_obj)
     py_str.c_ob_sstate = rffi.cast(rffi.INT, 1) # SSTATE_INTERNED_MORTAL
     track_reference(space, py_obj, w_obj)
@@ -116,9 +120,6 @@
 def string_dealloc(space, py_obj):
     """Frees allocated PyStringObject resources.
     """
-    py_str = rffi.cast(PyStringObject, py_obj)
-    if py_str.c_buffer:
-        lltype.free(py_str.c_buffer, flavor="raw")
     from pypy.module.cpyext.object import PyObject_dealloc
     PyObject_dealloc(space, py_obj)
 
@@ -139,6 +140,9 @@
 
 @cpython_api([PyObject], rffi.CCHARP, error=0)
 def PyString_AsString(space, ref):
+    return _PyString_AsString(space, ref)
+
+def _PyString_AsString(space, ref):
     if from_ref(space, rffi.cast(PyObject, ref.c_ob_type)) is space.w_str:
         pass    # typecheck returned "ok" without forcing 'ref' at all
     elif not PyString_Check(space, ref):   # otherwise, use the alternate way
@@ -151,15 +155,23 @@
                         "expected string or Unicode object, %T found",
                         from_ref(space, ref))
     ref_str = rffi.cast(PyStringObject, ref)
-    if not ref_str.c_buffer:
-        # copy string buffer
-        w_str = from_ref(space, ref)
-        s = space.str_w(w_str)
-        ref_str.c_buffer = rffi.str2charp(s)
-    return ref_str.c_buffer
+    if not pyobj_has_w_obj(ref):
+        # XXX Force the ref?
+        string_realize(space, ref)
+    return ref_str.c_ob_sval
+
+@cpython_api([rffi.VOIDP], rffi.CCHARP, error=0)
+def PyString_AS_STRING(space, void_ref):
+    ref = rffi.cast(PyObject, void_ref)
+    # if no w_str is associated with this ref,
+    # return the c-level ptr as RW
+    if not pyobj_has_w_obj(ref):
+        py_str = rffi.cast(PyStringObject, ref)
+        return py_str.c_ob_sval
+    return _PyString_AsString(space, ref)
 
 @cpython_api([PyObject, rffi.CCHARPP, rffi.CArrayPtr(Py_ssize_t)], 
rffi.INT_real, error=-1)
-def PyString_AsStringAndSize(space, ref, buffer, length):
+def PyString_AsStringAndSize(space, ref, data, length):
     if not PyString_Check(space, ref):
         from pypy.module.cpyext.unicodeobject import (
             PyUnicode_Check, _PyUnicode_AsDefaultEncodedString)
@@ -169,18 +181,16 @@
             raise oefmt(space.w_TypeError,
                         "expected string or Unicode object, %T found",
                         from_ref(space, ref))
+    if not pyobj_has_w_obj(ref):
+        # force the ref
+        string_realize(space, ref)
     ref_str = rffi.cast(PyStringObject, ref)
-    if not ref_str.c_buffer:
-        # copy string buffer
-        w_str = from_ref(space, ref)
-        s = space.str_w(w_str)
-        ref_str.c_buffer = rffi.str2charp(s)
-    buffer[0] = ref_str.c_buffer
+    data[0] = ref_str.c_ob_sval
     if length:
         length[0] = ref_str.c_ob_size
     else:
         i = 0
-        while ref_str.c_buffer[i] != '\0':
+        while ref_str.c_ob_sval[i] != '\0':
             i += 1
         if i != ref_str.c_ob_size:
             raise oefmt(space.w_TypeError,
@@ -209,10 +219,10 @@
     set to NULL, a memory exception is set, and -1 is returned.
     """
     # XXX always create a new string so far
-    py_str = rffi.cast(PyStringObject, ref[0])
-    if not py_str.c_buffer:
+    if pyobj_has_w_obj(ref[0]):
         raise oefmt(space.w_SystemError,
                     "_PyString_Resize called on already created string")
+    py_str = rffi.cast(PyStringObject, ref[0])
     try:
         py_newstr = new_empty_str(space, newsize)
     except MemoryError:
@@ -224,7 +234,7 @@
     if oldsize < newsize:
         to_cp = oldsize
     for i in range(to_cp):
-        py_newstr.c_buffer[i] = py_str.c_buffer[i]
+        py_newstr.c_ob_sval[i] = py_str.c_ob_sval[i]
     Py_DecRef(space, ref[0])
     ref[0] = rffi.cast(PyObject, py_newstr)
     return 0
diff --git a/pypy/module/cpyext/include/patchlevel.h 
b/pypy/module/cpyext/include/patchlevel.h
--- a/pypy/module/cpyext/include/patchlevel.h
+++ b/pypy/module/cpyext/include/patchlevel.h
@@ -29,8 +29,8 @@
 #define PY_VERSION             "2.7.10"
 
 /* PyPy version as a string */
-#define PYPY_VERSION "5.3.1-alpha0"
-#define PYPY_VERSION_NUM  0x05030100
+#define PYPY_VERSION "5.3.2-alpha0"
+#define PYPY_VERSION_NUM  0x05030200
 
 /* Defined to mean a PyPy where cpyext holds more regular references
    to PyObjects, e.g. staying alive as long as the internal PyPy object
diff --git a/pypy/module/cpyext/include/stringobject.h 
b/pypy/module/cpyext/include/stringobject.h
--- a/pypy/module/cpyext/include/stringobject.h
+++ b/pypy/module/cpyext/include/stringobject.h
@@ -10,7 +10,6 @@
 #include <stdarg.h>
 
 #define PyString_GET_SIZE(op) PyString_Size((PyObject*)(op))
-#define PyString_AS_STRING(op) PyString_AsString((PyObject*)(op))
 /*
 Type PyStringObject represents a character string.  An extra zero byte is
 reserved at the end to ensure it is zero-terminated, but a size is
@@ -41,12 +40,11 @@
     PyObject_VAR_HEAD
     long ob_shash;
     int ob_sstate;
-    char * buffer; /* change the name from cpython so all non-api c access is 
thwarted */
+    char ob_sval[1]; 
 
     /* Invariants 
-     * (not relevant in PyPy, all stringobjects are backed by a pypy object)
-     *     buffer contains space for 'ob_size+1' elements.
-     *     buffer[ob_size] == 0.
+     *     ob_sval contains space for 'ob_size+1' elements.
+     *     ob_sval[ob_size] == 0.
      *     ob_shash is the hash of the string or -1 if not computed yet.
      *     ob_sstate != 0 iff the string object is in stringobject.c's
      *       'interned' dictionary; in this case the two references
diff --git a/pypy/module/cpyext/pyfile.py b/pypy/module/cpyext/pyfile.py
--- a/pypy/module/cpyext/pyfile.py
+++ b/pypy/module/cpyext/pyfile.py
@@ -55,6 +55,7 @@
     if not PyFile_Check(space, w_p):
         raise oefmt(space.w_IOError, 'first argument must be an open file')
     assert isinstance(w_p, W_File)
+    w_p.stream.flush_buffers()
     try:
         fd = space.int_w(space.call_method(w_p, 'fileno'))
         mode = w_p.mode
diff --git a/pypy/module/cpyext/pyobject.py b/pypy/module/cpyext/pyobject.py
--- a/pypy/module/cpyext/pyobject.py
+++ b/pypy/module/cpyext/pyobject.py
@@ -7,7 +7,7 @@
 from pypy.module.cpyext.api import (
     cpython_api, bootstrap_function, PyObject, PyObjectP, ADDR,
     CANNOT_FAIL, Py_TPFLAGS_HEAPTYPE, PyTypeObjectPtr, is_PyObject,
-    INTERPLEVEL_API)
+    INTERPLEVEL_API, PyVarObject)
 from pypy.module.cpyext.state import State
 from pypy.objspace.std.typeobject import W_TypeObject
 from pypy.objspace.std.objectobject import W_ObjectObject
@@ -47,13 +47,16 @@
             size = pytype.c_tp_basicsize
         else:
             size = rffi.sizeof(self.basestruct)
-        if itemcount and w_type is not space.w_str:
+        if pytype.c_tp_itemsize:
             size += itemcount * pytype.c_tp_itemsize
         assert size >= rffi.sizeof(PyObject.TO)
         buf = lltype.malloc(rffi.VOIDP.TO, size,
                             flavor='raw', zero=True,
                             add_memory_pressure=True)
         pyobj = rffi.cast(PyObject, buf)
+        if pytype.c_tp_itemsize:
+            pyvarobj = rffi.cast(PyVarObject, pyobj)
+            pyvarobj.c_ob_size = itemcount
         pyobj.c_ob_refcnt = 1
         #pyobj.c_ob_pypy_link should get assigned very quickly
         pyobj.c_ob_type = pytype
@@ -152,13 +155,18 @@
 class InvalidPointerException(Exception):
     pass
 
-def create_ref(space, w_obj, itemcount=0):
+def create_ref(space, w_obj):
     """
     Allocates a PyObject, and fills its fields with info from the given
     interpreter object.
     """
     w_type = space.type(w_obj)
+    pytype = rffi.cast(PyTypeObjectPtr, as_pyobj(space, w_type))
     typedescr = get_typedescr(w_obj.typedef)
+    if pytype.c_tp_itemsize != 0:
+        itemcount = space.len_w(w_obj) # PyStringObject and subclasses
+    else:
+        itemcount = 0
     py_obj = typedescr.allocate(space, w_type, itemcount=itemcount)
     track_reference(space, py_obj, w_obj)
     #
diff --git a/pypy/module/cpyext/src/stringobject.c 
b/pypy/module/cpyext/src/stringobject.c
--- a/pypy/module/cpyext/src/stringobject.c
+++ b/pypy/module/cpyext/src/stringobject.c
@@ -107,7 +107,7 @@
     if (!string)
         return NULL;
 
-    s = PyString_AsString(string);
+    s = PyString_AS_STRING(string);
 
     for (f = format; *f; f++) {
         if (*f == '%') {
diff --git a/pypy/module/cpyext/test/test_bytearrayobject.py 
b/pypy/module/cpyext/test/test_bytearrayobject.py
--- a/pypy/module/cpyext/test/test_bytearrayobject.py
+++ b/pypy/module/cpyext/test/test_bytearrayobject.py
@@ -1,5 +1,6 @@
 from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase
 
+
 class AppTestStringObject(AppTestCpythonExtensionBase):
     def test_basic(self):
         module = self.import_extension('foo', [
@@ -113,6 +114,7 @@
         assert s == 'test'
 
     def test_manipulations(self):
+        import sys
         module = self.import_extension('foo', [
             ("bytearray_from_string", "METH_VARARGS",
              '''
@@ -155,7 +157,9 @@
              """)])
         assert module.bytearray_from_string("huheduwe") == "huhe"
         assert module.str_from_bytearray(bytearray('abc')) == 'abc'
-        raises(ValueError, module.str_from_bytearray, 4.0)
+        if '__pypy__' in sys.builtin_module_names:
+            # CPython only makes an assert.
+            raises(ValueError, module.str_from_bytearray, 4.0)
         ret = module.concat('abc', 'def')
         assert ret == 'abcdef'
         assert not isinstance(ret, str)
diff --git a/pypy/module/cpyext/test/test_bytesobject.py 
b/pypy/module/cpyext/test/test_bytesobject.py
--- a/pypy/module/cpyext/test/test_bytesobject.py
+++ b/pypy/module/cpyext/test/test_bytesobject.py
@@ -25,14 +25,13 @@
             ("test_Size", "METH_NOARGS",
              """
                  PyObject* s = PyString_FromString("Hello world");
-                 int result = 0;
+                 int result;
                  size_t expected_size;
 
-                 if(PyString_Size(s) == 11) {
-                     result = 1;
-                 }
+                 result = PyString_Size(s);
+                
                  #ifdef PYPY_VERSION
-                    expected_size = sizeof(void*)*7;
+                    expected_size = 48;
                  #elif defined Py_DEBUG
                     expected_size = 53;
                  #else
@@ -44,7 +43,7 @@
                      result = 0;
                  }
                  Py_DECREF(s);
-                 return PyBool_FromLong(result);
+                 return PyLong_FromLong(result);
              """),
             ("test_Size_exception", "METH_NOARGS",
              """
@@ -60,7 +59,7 @@
              """)], prologue='#include <stdlib.h>')
         assert module.get_hello1() == 'Hello world'
         assert module.get_hello2() == 'Hello world'
-        assert module.test_Size()
+        assert module.test_Size() == 11
         raises(TypeError, module.test_Size_exception)
 
         assert module.test_is_string("")
@@ -80,7 +79,7 @@
                  if (t == NULL)
                     return NULL;
                  Py_DECREF(t);
-                 c = PyString_AsString(s);
+                 c = PyString_AS_STRING(s);
                  c[0] = 'a';
                  c[1] = 'b';
                  c[2] = 0;
@@ -108,14 +107,23 @@
                 obj = (PyStringObject*)type->tp_alloc(type, 10);
                 if (PyString_GET_SIZE(obj) != 10)
                     return PyLong_FromLong(PyString_GET_SIZE(obj));
-                /* cannot work, there is only RO access
-                memcpy(PyString_AS_STRING(obj), "works", 6); */
+                /* cannot work, there is only RO access */
+                memcpy(PyString_AS_STRING(obj), "works", 6);
                 Py_INCREF(obj);
                 return (PyObject*)obj;
              """),
+            ('alloc_rw', "METH_NOARGS",
+             '''
+                PyObject *obj = _PyObject_NewVar(&PyString_Type, 10);
+                char * buf = PyString_AS_STRING(obj);
+                memcpy(PyString_AS_STRING(obj), "works", 6);
+                return (PyObject*)obj;
+             '''),
             ])
+        s = module.alloc_rw()
+        assert s == 'works' + '\x00' * 5
         s = module.tpalloc()
-        assert s == '\x00' * 10
+        assert s == 'works' + '\x00' * 5
 
     def test_AsString(self):
         module = self.import_extension('foo', [
@@ -330,27 +338,127 @@
         # doesn't really test, but if printf is enabled will prove sstate
         assert module.test_sstate()
 
+    def test_subclass(self):
+        # taken from PyStringArrType_Type in numpy's scalartypes.c.src
+        module = self.import_extension('bar', [
+            ("newsubstr", "METH_O",
+             """
+                PyObject * obj;
+                char * data;
+                int len;
+                PyType_Ready(&PyStringArrType_Type);
+                
+                data = PyString_AS_STRING(args);
+                len = PyString_GET_SIZE(args);
+                if (data == NULL || len < 1)
+                    Py_RETURN_NONE;
+                obj = PyArray_Scalar(data, len);
+                return obj;
+             """),
+            ], prologue="""
+                #include <Python.h>
+                PyTypeObject PyStringArrType_Type = {
+                    PyObject_HEAD_INIT(NULL)
+                    0,                            /* ob_size */
+                    "bar.string_",                /* tp_name*/
+                    sizeof(PyStringObject), /* tp_basicsize*/
+                    0                             /* tp_itemsize */
+                    };
+
+                    static PyObject *
+                    stringtype_repr(PyObject *self)
+                    {
+                        const char *dptr, *ip;
+                        int len;
+                        PyObject *new;
+                        PyObject *ret;
+
+                        ip = dptr = PyString_AS_STRING(self);
+                        len = PyString_GET_SIZE(self);
+                        dptr += len-1;
+                        while(len > 0 && *dptr-- == 0) {
+                            len--;
+                        }
+                        new = PyString_FromStringAndSize(ip, len);
+                        if (new == NULL) {
+                            return PyString_FromString("");
+                        }
+                        return new;
+                    }
+
+                    static PyObject *
+                    stringtype_str(PyObject *self)
+                    {
+                        const char *dptr, *ip;
+                        int len;
+                        PyObject *new;
+                        PyObject *ret;
+
+                        ip = dptr = PyString_AS_STRING(self);
+                        len = PyString_GET_SIZE(self);
+                        dptr += len-1;
+                        while(len > 0 && *dptr-- == 0) {
+                            len--;
+                        }
+                        new = PyString_FromStringAndSize(ip, len);
+                        if (new == NULL) {
+                            return PyString_FromString("");
+                        }
+                        return new;
+                    }
+
+                    PyObject *
+                    PyArray_Scalar(char *data, int n)
+                    {
+                        PyTypeObject *type = &PyStringArrType_Type;
+                        PyObject *obj;
+                        void *destptr;
+                        int type_num;
+                        int itemsize = n;
+                        obj = type->tp_alloc(type, itemsize);
+                        if (obj == NULL) {
+                            return NULL;
+                        }
+                        destptr = PyString_AS_STRING(obj);
+                        ((PyStringObject *)obj)->ob_shash = -1;
+                        memcpy(destptr, data, itemsize);
+                        return obj;
+                    }
+            """, more_init = '''
+                PyStringArrType_Type.tp_alloc = NULL;
+                PyStringArrType_Type.tp_free = NULL;
+
+                PyStringArrType_Type.tp_repr = stringtype_repr;
+                PyStringArrType_Type.tp_str = stringtype_str;
+                PyStringArrType_Type.tp_flags = 
Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE;
+                PyStringArrType_Type.tp_itemsize = sizeof(char);
+                PyStringArrType_Type.tp_base = &PyString_Type;
+            ''')
+
+        a = module.newsubstr('abc')
+        assert type(a).__name__ == 'string_'
+        assert a == 'abc'
 
 class TestString(BaseApiTest):
     def test_string_resize(self, space, api):
         py_str = new_empty_str(space, 10)
         ar = lltype.malloc(PyObjectP.TO, 1, flavor='raw')
-        py_str.c_buffer[0] = 'a'
-        py_str.c_buffer[1] = 'b'
-        py_str.c_buffer[2] = 'c'
+        py_str.c_ob_sval[0] = 'a'
+        py_str.c_ob_sval[1] = 'b'
+        py_str.c_ob_sval[2] = 'c'
         ar[0] = rffi.cast(PyObject, py_str)
         api._PyString_Resize(ar, 3)
         py_str = rffi.cast(PyStringObject, ar[0])
         assert py_str.c_ob_size == 3
-        assert py_str.c_buffer[1] == 'b'
-        assert py_str.c_buffer[3] == '\x00'
+        assert py_str.c_ob_sval[1] == 'b'
+        assert py_str.c_ob_sval[3] == '\x00'
         # the same for growing
         ar[0] = rffi.cast(PyObject, py_str)
         api._PyString_Resize(ar, 10)
         py_str = rffi.cast(PyStringObject, ar[0])
         assert py_str.c_ob_size == 10
-        assert py_str.c_buffer[1] == 'b'
-        assert py_str.c_buffer[10] == '\x00'
+        assert py_str.c_ob_sval[1] == 'b'
+        assert py_str.c_ob_sval[10] == '\x00'
         Py_DecRef(space, ar[0])
         lltype.free(ar, flavor='raw')
 
diff --git a/pypy/module/cpyext/test/test_datetime.py 
b/pypy/module/cpyext/test/test_datetime.py
--- a/pypy/module/cpyext/test/test_datetime.py
+++ b/pypy/module/cpyext/test/test_datetime.py
@@ -142,7 +142,7 @@
                     2000, 6, 6, 6, 6, 6, 6, Py_None,
                     PyDateTimeAPI->DateTimeType);
              """),
-        ])
+        ], prologue='#include "datetime.h"\n')
         import datetime
         assert module.new_date() == datetime.date(2000, 6, 6)
         assert module.new_time() == datetime.time(6, 6, 6, 6)
@@ -241,6 +241,9 @@
                  PyObject* obj = PyDelta_FromDSU(6, 6, 6);
                  PyDateTime_Delta* delta = (PyDateTime_Delta*)obj;
 
+#if defined(PYPY_VERSION) || PY_VERSION_HEX >= 0x03030000
+                 // These macros are only defined in CPython 3.x and PyPy.
+                 // See: http://bugs.python.org/issue13727
                  PyDateTime_DELTA_GET_DAYS(obj);
                  PyDateTime_DELTA_GET_DAYS(delta);
 
@@ -249,10 +252,10 @@
 
                  PyDateTime_DELTA_GET_MICROSECONDS(obj);
                  PyDateTime_DELTA_GET_MICROSECONDS(delta);
-
+#endif
                  return obj;
              """),
-            ])
+            ], prologue='#include "datetime.h"\n')
         import datetime
         assert module.test_date_macros() == datetime.date(2000, 6, 6)
         assert module.test_datetime_macros() == datetime.datetime(
diff --git a/pypy/module/cpyext/test/test_pyfile.py 
b/pypy/module/cpyext/test/test_pyfile.py
--- a/pypy/module/cpyext/test/test_pyfile.py
+++ b/pypy/module/cpyext/test/test_pyfile.py
@@ -1,5 +1,7 @@
+from pypy.conftest import option
 from pypy.module.cpyext.api import fopen, fclose, fwrite
 from pypy.module.cpyext.test.test_api import BaseApiTest
+from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase
 from pypy.module.cpyext.object import Py_PRINT_RAW
 from rpython.rtyper.lltypesystem import rffi, lltype
 from rpython.tool.udir import udir
@@ -111,3 +113,34 @@
         out, err = capfd.readouterr()
         out = out.replace('\r\n', '\n')
         assert out == " 1 23\n"
+
+
+class AppTestPyFile(AppTestCpythonExtensionBase):
+
+    def setup_class(cls):
+        from rpython.tool.udir import udir
+        if option.runappdirect:
+            cls.w_udir = str(udir)
+        else:
+            cls.w_udir = cls.space.wrap(str(udir))
+
+    def test_file_tell(self):
+        module = self.import_extension('foo', [
+            ("get_c_tell", "METH_O",
+             """
+                FILE * fp = PyFile_AsFile(args);
+                if (fp == NULL)
+                    return PyLong_FromLong(0);
+                return PyLong_FromLong(ftell(fp));
+             """),
+            ])
+        filename = self.udir + "/_test_file"
+        with open(filename, 'w') as fid:
+            fid.write('3' * 122)
+        with open(filename, 'r') as fid:
+            s = fid.read(80)
+            t_py = fid.tell()
+            assert t_py == 80
+            t_c = module.get_c_tell(fid)
+        assert t_c == t_py
+
diff --git a/pypy/module/cpyext/test/test_typeobject.py 
b/pypy/module/cpyext/test/test_typeobject.py
--- a/pypy/module/cpyext/test/test_typeobject.py
+++ b/pypy/module/cpyext/test/test_typeobject.py
@@ -5,8 +5,6 @@
 from pypy.module.cpyext.pyobject import make_ref, from_ref
 from pypy.module.cpyext.typeobject import PyTypeObjectPtr
 
-import sys
-
 class AppTestTypeObject(AppTestCpythonExtensionBase):
     def test_typeobject(self):
         import sys
@@ -896,7 +894,7 @@
         module.footype("X", (object,), {})
 
     def test_app_subclass_of_c_type(self):
-        # on cpython, the size changes (6 bytes added)
+        import sys
         module = self.import_module(name='foo')
         size = module.size_of_instances(module.fooType)
         class f1(object):
@@ -906,7 +904,11 @@
         class bar(f1, f2):
             pass
         assert bar.__base__ is f2
-        assert module.size_of_instances(bar) == size
+        # On cpython, the size changes.
+        if '__pypy__' in sys.builtin_module_names:
+            assert module.size_of_instances(bar) == size
+        else:
+            assert module.size_of_instances(bar) >= size
 
     def test_app_cant_subclass_two_types(self):
         module = self.import_module(name='foo')
diff --git a/pypy/module/cpyext/tupleobject.py 
b/pypy/module/cpyext/tupleobject.py
--- a/pypy/module/cpyext/tupleobject.py
+++ b/pypy/module/cpyext/tupleobject.py
@@ -142,6 +142,7 @@
     ref = rffi.cast(PyTupleObject, ref)
     size = ref.c_ob_size
     if index < 0 or index >= size:
+        decref(space, py_obj)
         raise oefmt(space.w_IndexError, "tuple assignment index out of range")
     old_ref = ref.c_ob_item[index]
     ref.c_ob_item[index] = py_obj    # consumes a reference
diff --git a/pypy/module/cpyext/typeobject.py b/pypy/module/cpyext/typeobject.py
--- a/pypy/module/cpyext/typeobject.py
+++ b/pypy/module/cpyext/typeobject.py
@@ -657,6 +657,8 @@
         pto.c_tp_dealloc = llhelper(
             subtype_dealloc.api_func.functype,
             subtype_dealloc.api_func.get_wrapper(space))
+    if space.is_w(w_type, space.w_str):
+        pto.c_tp_itemsize = 1
     # buffer protocol
     setup_buffer_procs(space, w_type, pto)
 
@@ -695,6 +697,8 @@
     if pto.c_tp_base:
         if pto.c_tp_base.c_tp_basicsize > pto.c_tp_basicsize:
             pto.c_tp_basicsize = pto.c_tp_base.c_tp_basicsize
+        if pto.c_tp_itemsize < pto.c_tp_base.c_tp_itemsize:
+            pto.c_tp_itemsize = pto.c_tp_base.c_tp_itemsize
 
     # will be filled later on with the correct value
     # may not be 0
diff --git a/pypy/module/cpyext/unicodeobject.py 
b/pypy/module/cpyext/unicodeobject.py
--- a/pypy/module/cpyext/unicodeobject.py
+++ b/pypy/module/cpyext/unicodeobject.py
@@ -77,7 +77,9 @@
     """
     py_uni = rffi.cast(PyUnicodeObject, py_obj)
     s = rffi.wcharpsize2unicode(py_uni.c_str, py_uni.c_length)
-    w_obj = space.wrap(s)
+    w_type = from_ref(space, rffi.cast(PyObject, py_obj.c_ob_type))
+    w_obj = space.allocate_instance(unicodeobject.W_UnicodeObject, w_type)
+    w_obj.__init__(s)
     py_uni.c_hash = space.hash_w(w_obj)
     track_reference(space, py_obj, w_obj)
     return w_obj
diff --git a/pypy/module/pypyjit/test_pypy_c/test_micronumpy.py 
b/pypy/module/pypyjit/test_pypy_c/test_micronumpy.py
--- a/pypy/module/pypyjit/test_pypy_c/test_micronumpy.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_micronumpy.py
@@ -163,14 +163,10 @@
             guard_not_invalidated(descr=...)
             i32 = float_ne(f31, 0.000000)
             guard_true(i32, descr=...)
-            i34 = getarrayitem_raw_i(#, #, descr=<ArrayU 1>)  # XXX what are 
these?
-            guard_value(i34, #, descr=...)                  # XXX don't appear 
in
-            i35 = getarrayitem_raw_i(#, #, descr=<ArrayU 1>)  # XXX equiv 
test_zjit
             i36 = int_add(i24, 1)
             i37 = int_add(i29, 8)
             i38 = int_ge(i36, i30)
             guard_false(i38, descr=...)
-            guard_value(i35, #, descr=...)                  # XXX
             jump(..., descr=...)
         """)
 
diff --git a/pypy/module/sys/__init__.py b/pypy/module/sys/__init__.py
--- a/pypy/module/sys/__init__.py
+++ b/pypy/module/sys/__init__.py
@@ -1,6 +1,7 @@
 from pypy.interpreter.mixedmodule import MixedModule
 from pypy.interpreter.error import OperationError
 from rpython.rlib.objectmodel import we_are_translated
+from rpython.rlib import rdynload
 import sys
 
 _WIN = sys.platform == 'win32'
@@ -19,6 +20,7 @@
         self.defaultencoding = "ascii"
         self.filesystemencoding = None
         self.debug = True
+        self.dlopenflags = rdynload._dlopen_default_mode()
 
     interpleveldefs = {
         '__name__'              : '(space.wrap("sys"))',
@@ -85,7 +87,9 @@
 
         'float_info'            : 'system.get_float_info(space)',
         'long_info'             : 'system.get_long_info(space)',
-        'float_repr_style'      : 'system.get_float_repr_style(space)'
+        'float_repr_style'      : 'system.get_float_repr_style(space)',
+        'getdlopenflags'        : 'system.getdlopenflags',
+        'setdlopenflags'        : 'system.setdlopenflags',
         }
 
     if sys.platform == 'win32':
diff --git a/pypy/module/sys/system.py b/pypy/module/sys/system.py
--- a/pypy/module/sys/system.py
+++ b/pypy/module/sys/system.py
@@ -58,3 +58,9 @@
 
 def get_float_repr_style(space):
     return space.wrap("short")
+
+def getdlopenflags(space):
+    return space.wrap(space.sys.dlopenflags)
+
+def setdlopenflags(space, w_flags):
+    space.sys.dlopenflags = space.int_w(w_flags)
diff --git a/pypy/module/sys/test/test_sysmodule.py 
b/pypy/module/sys/test/test_sysmodule.py
--- a/pypy/module/sys/test/test_sysmodule.py
+++ b/pypy/module/sys/test/test_sysmodule.py
@@ -445,14 +445,12 @@
 
     def test_dlopenflags(self):
         import sys
-        if hasattr(sys, "setdlopenflags"):
-            assert hasattr(sys, "getdlopenflags")
-            raises(TypeError, sys.getdlopenflags, 42)
-            oldflags = sys.getdlopenflags()
-            raises(TypeError, sys.setdlopenflags)
-            sys.setdlopenflags(oldflags+1)
-            assert sys.getdlopenflags() == oldflags+1
-            sys.setdlopenflags(oldflags)
+        raises(TypeError, sys.getdlopenflags, 42)
+        oldflags = sys.getdlopenflags()
+        raises(TypeError, sys.setdlopenflags)
+        sys.setdlopenflags(oldflags+1)
+        assert sys.getdlopenflags() == oldflags+1
+        sys.setdlopenflags(oldflags)
 
     def test_refcount(self):
         import sys
@@ -610,7 +608,7 @@
 class AppTestSysSettracePortedFromCpython(object):
     def test_sys_settrace(self):
         import sys
-        
+
         class Tracer:
             def __init__(self):
                 self.events = []
diff --git a/pypy/module/sys/version.py b/pypy/module/sys/version.py
--- a/pypy/module/sys/version.py
+++ b/pypy/module/sys/version.py
@@ -10,7 +10,7 @@
 #XXX # sync CPYTHON_VERSION with patchlevel.h, package.py
 CPYTHON_API_VERSION        = 1013   #XXX # sync with include/modsupport.h
 
-PYPY_VERSION               = (5, 3, 1, "alpha", 0)    #XXX # sync patchlevel.h
+PYPY_VERSION               = (5, 3, 2, "alpha", 0)    #XXX # sync patchlevel.h
 
 
 import pypy
diff --git a/pypy/objspace/fake/objspace.py b/pypy/objspace/fake/objspace.py
--- a/pypy/objspace/fake/objspace.py
+++ b/pypy/objspace/fake/objspace.py
@@ -417,11 +417,11 @@
 class FakeModule(W_Root):
     def __init__(self):
         self.w_dict = w_some_obj()
-
     def get(self, name):
         name + "xx"   # check that it's a string
         return w_some_obj()
 FakeObjSpace.sys = FakeModule()
 FakeObjSpace.sys.filesystemencoding = 'foobar'
 FakeObjSpace.sys.defaultencoding = 'ascii'
+FakeObjSpace.sys.dlopenflags = 123
 FakeObjSpace.builtin = FakeModule()
diff --git a/pypy/objspace/std/bytesobject.py b/pypy/objspace/std/bytesobject.py
--- a/pypy/objspace/std/bytesobject.py
+++ b/pypy/objspace/std/bytesobject.py
@@ -18,6 +18,7 @@
 from pypy.objspace.std.unicodeobject import (
     decode_object, unicode_from_encoded_object,
     unicode_from_string, getdefaultencoding)
+from pypy.objspace.std.util import IDTAG_SPECIAL, IDTAG_SHIFT
 
 
 class W_AbstractBytesObject(W_Root):
@@ -30,12 +31,26 @@
             return True
         if self.user_overridden_class or w_other.user_overridden_class:
             return False
-        return space.str_w(self) is space.str_w(w_other)
+        s1 = space.str_w(self)
+        s2 = space.str_w(w_other)
+        if len(s2) > 1:
+            return s1 is s2
+        else:            # strings of len <= 1 are unique-ified
+            return s1 == s2
 
     def immutable_unique_id(self, space):
         if self.user_overridden_class:
             return None
-        return space.wrap(compute_unique_id(space.str_w(self)))
+        s = space.str_w(self)
+        if len(s) > 1:
+            uid = compute_unique_id(s)
+        else:            # strings of len <= 1 are unique-ified
+            if len(s) == 1:
+                base = ord(s[0])     # base values 0-255
+            else:
+                base = 256           # empty string: base value 256
+            uid = (base << IDTAG_SHIFT) | IDTAG_SPECIAL
+        return space.wrap(uid)
 
     def unicode_w(self, space):
         # Use the default encoding.
diff --git a/pypy/objspace/std/setobject.py b/pypy/objspace/std/setobject.py
--- a/pypy/objspace/std/setobject.py
+++ b/pypy/objspace/std/setobject.py
@@ -6,6 +6,7 @@
 from pypy.objspace.std.bytesobject import W_BytesObject
 from pypy.objspace.std.intobject import W_IntObject
 from pypy.objspace.std.unicodeobject import W_UnicodeObject
+from pypy.objspace.std.util import IDTAG_SPECIAL, IDTAG_SHIFT
 
 from rpython.rlib.objectmodel import r_dict
 from rpython.rlib.objectmodel import iterkeys_with_hash, contains_with_hash
@@ -575,6 +576,23 @@
 class W_FrozensetObject(W_BaseSetObject):
     hash = 0
 
+    def is_w(self, space, w_other):
+        if not isinstance(w_other, W_FrozensetObject):
+            return False
+        if self is w_other:
+            return True
+        if self.user_overridden_class or w_other.user_overridden_class:
+            return False
+        # empty frozensets are unique-ified
+        return 0 == w_other.length() == self.length()
+
+    def immutable_unique_id(self, space):
+        if self.user_overridden_class or self.length() > 0:
+            return None
+        # empty frozenset: base value 259
+        uid = (259 << IDTAG_SHIFT) | IDTAG_SPECIAL
+        return space.wrap(uid)
+
     def _newobj(self, space, w_iterable):
         """Make a new frozenset by taking ownership of 'w_iterable'."""
         if type(self) is W_FrozensetObject:
diff --git a/pypy/objspace/std/stringmethods.py 
b/pypy/objspace/std/stringmethods.py
--- a/pypy/objspace/std/stringmethods.py
+++ b/pypy/objspace/std/stringmethods.py
@@ -162,7 +162,8 @@
             buffer = _get_buffer(space, w_sub)
             res = count(value, buffer, start, end)
 
-        return space.wrap(max(res, 0))
+        assert res >= 0
+        return space.wrap(res)
 
     def descr_decode(self, space, w_encoding=None, w_errors=None):
         from pypy.objspace.std.unicodeobject import (
diff --git a/pypy/objspace/std/test/test_bytearrayobject.py 
b/pypy/objspace/std/test/test_bytearrayobject.py
--- a/pypy/objspace/std/test/test_bytearrayobject.py
+++ b/pypy/objspace/std/test/test_bytearrayobject.py
@@ -211,6 +211,7 @@
 
         check(bytearray('abc').replace('b', bytearray('d')), 'adc')
         check(bytearray('abc').replace('b', 'd'), 'adc')
+        check(bytearray('').replace('a', 'ab'), '')
 
         check(bytearray('abc').upper(), 'ABC')
         check(bytearray('ABC').lower(), 'abc')
diff --git a/pypy/objspace/std/test/test_obj.py 
b/pypy/objspace/std/test/test_obj.py
--- a/pypy/objspace/std/test/test_obj.py
+++ b/pypy/objspace/std/test/test_obj.py
@@ -186,17 +186,36 @@
     def test_id_on_strs(self):
         if self.appdirect:
             skip("cannot run this test as apptest")
-        u = u"a"
-        assert id(self.unwrap_wrap_unicode(u)) == id(u)
-        s = "a"
-        assert id(self.unwrap_wrap_str(s)) == id(s)
+        for u in [u"", u"a", u"aa"]:
+            assert id(self.unwrap_wrap_unicode(u)) == id(u)
+            s = str(u)
+            assert id(self.unwrap_wrap_str(s)) == id(s)
+        #
+        assert id('') == (256 << 4) | 11     # always
+        assert id(u'') == (257 << 4) | 11
+        assert id('a') == (ord('a') << 4) | 11
+        assert id(u'\u1234') == ((~0x1234) << 4) | 11
+
+    def test_id_of_tuples(self):
+        l = []
+        x = (l,)
+        assert id(x) != id((l,))          # no caching at all
+        if self.appdirect:
+            skip("cannot run this test as apptest")
+        assert id(()) == (258 << 4) | 11     # always
+
+    def test_id_of_frozensets(self):
+        x = frozenset([4])
+        assert id(x) != id(frozenset([4]))          # no caching at all
+        if self.appdirect:
+            skip("cannot run this test as apptest")
+        assert id(frozenset()) == (259 << 4) | 11     # always
+        assert id(frozenset([])) == (259 << 4) | 11   # always
 
     def test_identity_vs_id_primitives(self):
-        if self.cpython_apptest:
-            skip("cpython behaves differently")
         import sys
-        l = range(-10, 10)
-        for i in range(10):
+        l = range(-10, 10, 2)
+        for i in [0, 1, 3]:
             l.append(float(i))
             l.append(i + 0.1)
             l.append(long(i))
@@ -206,18 +225,15 @@
             l.append(i - 1j)
             l.append(1 + i * 1j)
             l.append(1 - i * 1j)
-            s = str(i)
-            l.append(s)
-            u = unicode(s)
-            l.append(u)
+            l.append((i,))
+            l.append(frozenset([i]))
         l.append(-0.0)
         l.append(None)
         l.append(True)
         l.append(False)
-        s = "s"
-        l.append(s)
-        s = u"s"
-        l.append(s)
+        l.append(())
+        l.append(tuple([]))
+        l.append(frozenset())
 
         for i, a in enumerate(l):
             for b in l[i:]:
@@ -228,21 +244,18 @@
     def test_identity_vs_id_str(self):
         if self.appdirect:
             skip("cannot run this test as apptest")
-        import sys
-        l = range(-10, 10)
-        for i in range(10):
-            s = str(i)
+        l = []
+        def add(s, u):
             l.append(s)
             l.append(self.unwrap_wrap_str(s))
-            u = unicode(s)
+            l.append(s[:1] + s[1:])
             l.append(u)
             l.append(self.unwrap_wrap_unicode(u))
-        s = "s"
-        l.append(s)
-        l.append(self.unwrap_wrap_str(s))
-        s = u"s"
-        l.append(s)
-        l.append(self.unwrap_wrap_unicode(s))
+            l.append(u[:1] + u[1:])
+        for i in range(3, 18):
+            add(str(i), unicode(i))
+        add("s", u"s")
+        add("", u"")
 
         for i, a in enumerate(l):
             for b in l[i:]:
diff --git a/pypy/objspace/std/tupleobject.py b/pypy/objspace/std/tupleobject.py
--- a/pypy/objspace/std/tupleobject.py
+++ b/pypy/objspace/std/tupleobject.py
@@ -9,7 +9,7 @@
 from pypy.interpreter.typedef import TypeDef
 from pypy.objspace.std.sliceobject import (W_SliceObject, unwrap_start_stop,
     normalize_simple_slice)
-from pypy.objspace.std.util import negate
+from pypy.objspace.std.util import negate, IDTAG_SPECIAL, IDTAG_SHIFT
 from rpython.rlib import jit
 from rpython.rlib.debug import make_sure_not_resized
 from rpython.rlib.rarithmetic import intmask
@@ -38,6 +38,23 @@
 class W_AbstractTupleObject(W_Root):
     __slots__ = ()
 
+    def is_w(self, space, w_other):
+        if not isinstance(w_other, W_AbstractTupleObject):
+            return False
+        if self is w_other:
+            return True
+        if self.user_overridden_class or w_other.user_overridden_class:
+            return False
+        # empty tuples are unique-ified
+        return 0 == w_other.length() == self.length()
+
+    def immutable_unique_id(self, space):
+        if self.user_overridden_class or self.length() > 0:
+            return None
+        # empty tuple: base value 258
+        uid = (258 << IDTAG_SHIFT) | IDTAG_SPECIAL
+        return space.wrap(uid)
+
     def __repr__(self):
         """representation for debugging purposes"""
         reprlist = [repr(w_item) for w_item in self.tolist()]
diff --git a/pypy/objspace/std/unicodeobject.py 
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -18,6 +18,7 @@
 from pypy.objspace.std.basestringtype import basestring_typedef
 from pypy.objspace.std.formatting import mod_format
 from pypy.objspace.std.stringmethods import StringMethods
+from pypy.objspace.std.util import IDTAG_SPECIAL, IDTAG_SHIFT
 
 __all__ = ['W_UnicodeObject', 'wrapunicode', 'plain_str2unicode',
            'encode_object', 'decode_object', 'unicode_from_object',
@@ -52,12 +53,26 @@
             return True
         if self.user_overridden_class or w_other.user_overridden_class:
             return False
-        return space.unicode_w(self) is space.unicode_w(w_other)
+        s1 = space.unicode_w(self)
+        s2 = space.unicode_w(w_other)
+        if len(s2) > 1:
+            return s1 is s2
+        else:            # strings of len <= 1 are unique-ified
+            return s1 == s2
 
     def immutable_unique_id(self, space):
         if self.user_overridden_class:
             return None
-        return space.wrap(compute_unique_id(space.unicode_w(self)))
+        s = space.unicode_w(self)
+        if len(s) > 1:
+            uid = compute_unique_id(s)
+        else:            # strings of len <= 1 are unique-ified
+            if len(s) == 1:
+                base = ~ord(s[0])      # negative base values
+            else:
+                base = 257       # empty unicode string: base value 257
+            uid = (base << IDTAG_SHIFT) | IDTAG_SPECIAL
+        return space.wrap(uid)
 
     def str_w(self, space):
         return space.str_w(space.str(self))
diff --git a/pypy/objspace/std/util.py b/pypy/objspace/std/util.py
--- a/pypy/objspace/std/util.py
+++ b/pypy/objspace/std/util.py
@@ -9,6 +9,12 @@
 IDTAG_FLOAT   = 5
 IDTAG_COMPLEX = 7
 IDTAG_METHOD  = 9
+IDTAG_SPECIAL = 11    # -1 - (-maxunicode-1): unichar
+                      # 0 - 255: char
+                      # 256: empty string
+                      # 257: empty unicode
+                      # 258: empty tuple
+                      # 259: empty frozenset
 
 CMP_OPS = dict(lt='<', le='<=', eq='==', ne='!=', gt='>', ge='>=')
 BINARY_BITWISE_OPS = {'and': '&', 'lshift': '<<', 'or': '|', 'rshift': '>>',
diff --git a/pypy/tool/release/repackage.sh b/pypy/tool/release/repackage.sh
--- a/pypy/tool/release/repackage.sh
+++ b/pypy/tool/release/repackage.sh
@@ -1,9 +1,9 @@
 # Edit these appropriately before running this script
 maj=5
 min=3
-rev=0
+rev=1
 branchname=release-$maj.x  # ==OR== release-$maj.$min.x
-tagname=release-pypy2.7-v$maj.$min  # ==OR== release-$maj.$min
+tagname=release-pypy2.7-v$maj.$min.$rev  # ==OR== release-$maj.$min
 
 echo checking hg log -r $branchname
 hg log -r $branchname || exit 1
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_virtualstate.py 
b/rpython/jit/metainterp/optimizeopt/test/test_virtualstate.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_virtualstate.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_virtualstate.py
@@ -2,7 +2,7 @@
 import py
 from rpython.jit.metainterp.optimizeopt.virtualstate import VirtualStateInfo,\
      VStructStateInfo, LEVEL_CONSTANT,\
-     VArrayStateInfo, NotVirtualStateInfo, VirtualState,\
+     VArrayStateInfo, not_virtual, VirtualState,\
      GenerateGuardState, VirtualStatesCantMatch, VArrayStructStateInfo
 from rpython.jit.metainterp.history import ConstInt, ConstPtr, TargetToken
 from rpython.jit.metainterp.resoperation import InputArgInt, InputArgRef,\
@@ -31,10 +31,10 @@
     def setup_class(self):
         classbox = self.cpu.ts.cls_of_box(InputArgRef(self.nodeaddr))
         value = info.InstancePtrInfo(None, classbox)
-        self.knownclass_info = NotVirtualStateInfo(self.cpu, 'r', value)
+        self.knownclass_info = not_virtual(self.cpu, 'r', value)
         classbox = self.cpu.ts.cls_of_box(InputArgRef(self.node2addr))
         value = info.InstancePtrInfo(None, classbox)
-        self.knownclass_info2 = NotVirtualStateInfo(self.cpu, 'r', value)
+        self.knownclass_info2 = not_virtual(self.cpu, 'r', value)
     
     def guards(self, info1, info2, box, runtime_box, expected, inputargs=None):
         if inputargs is None:
@@ -80,7 +80,7 @@
     def test_make_inputargs(self):
         optimizer = FakeOptimizer(self.cpu)
         args = [InputArgInt()]
-        info0 = NotVirtualStateInfo(self.cpu, args[0].type, None)
+        info0 = not_virtual(self.cpu, args[0].type, None)
         vs = VirtualState([info0])
         assert vs.make_inputargs(args, optimizer) == args
         info0.level = LEVEL_CONSTANT
@@ -108,8 +108,8 @@
             assert info1 in state.bad and info2 in state.bad
 
         for BoxType in (InputArgInt, InputArgFloat, InputArgRef):
-            info1 = NotVirtualStateInfo(self.cpu, BoxType.type, None)
-            info2 = NotVirtualStateInfo(self.cpu, BoxType.type, None)
+            info1 = not_virtual(self.cpu, BoxType.type, None)
+            info2 = not_virtual(self.cpu, BoxType.type, None)
             postest(info1, info2)
 
         info1, info2 = VArrayStateInfo(42), VArrayStateInfo(42)
@@ -126,9 +126,9 @@
 
     def test_NotVirtualStateInfo_generalization(self):
         def isgeneral(tp1, info1, tp2, info2):
-            info1 = NotVirtualStateInfo(self.cpu, tp1, info1)
+            info1 = not_virtual(self.cpu, tp1, info1)
             info1.position = 0
-            info2 = NotVirtualStateInfo(self.cpu, tp2, info2)
+            info2 = not_virtual(self.cpu, tp2, info2)
             info2.position = 0
             return 
VirtualState([info1]).generalization_of(VirtualState([info2]), 
FakeOptimizer(self.cpu))
 
@@ -166,8 +166,8 @@
         assert not isgeneral('r', value1, 'r', value2)
 
     def test_field_matching_generalization(self):
-        const1 = NotVirtualStateInfo(self.cpu, 'i', ConstIntBound(1))
-        const2 = NotVirtualStateInfo(self.cpu, 'i', ConstIntBound(2))
+        const1 = not_virtual(self.cpu, 'i', ConstIntBound(1))
+        const2 = not_virtual(self.cpu, 'i', ConstIntBound(2))
         const1.position = const2.position = 1
         self.check_invalid(const1, const2)
         self.check_invalid(const2, const1)
@@ -192,16 +192,16 @@
 
     def test_known_class_generalization(self):
         knownclass1 = info.InstancePtrInfo(None, ConstPtr(self.myptr))
-        info1 = NotVirtualStateInfo(self.cpu, 'r', knownclass1)
+        info1 = not_virtual(self.cpu, 'r', knownclass1)
         info1.position = 0
         knownclass2 = info.InstancePtrInfo(None, ConstPtr(self.myptr))
-        info2 = NotVirtualStateInfo(self.cpu, 'r', knownclass2)
+        info2 = not_virtual(self.cpu, 'r', knownclass2)
         info2.position = 0
         self.check_no_guards(info1, info2)
         self.check_no_guards(info2, info1)
 
         knownclass3 = info.InstancePtrInfo(None, ConstPtr(self.myptr2))
-        info3 = NotVirtualStateInfo(self.cpu, 'r', knownclass3)
+        info3 = not_virtual(self.cpu, 'r', knownclass3)
         info3.position = 0
         self.check_invalid(info1, info3)
         self.check_invalid(info2, info3)
@@ -222,26 +222,26 @@
         #unknown_val = PtrOptValue(self.nodebox)
         #unknownnull_val = PtrOptValue(BoxPtr(self.nullptr))
         opt = FakeOptimizer(self.cpu)
-        unknown_info = NotVirtualStateInfo(self.cpu, 'r', None)
+        unknown_info = not_virtual(self.cpu, 'r', None)
 
-        nonnull_info = NotVirtualStateInfo(self.cpu, 'r', 
info.NonNullPtrInfo())
+        nonnull_info = not_virtual(self.cpu, 'r', info.NonNullPtrInfo())
 
         classbox1 = self.cpu.ts.cls_of_box(ConstPtr(self.nodeaddr))
-        knownclass_info = NotVirtualStateInfo(self.cpu, 'r',
-                                    info.InstancePtrInfo(None, classbox1))
+        knownclass_info = not_virtual(self.cpu, 'r',
+                                      info.InstancePtrInfo(None, classbox1))
         classbox2 = self.cpu.ts.cls_of_box(ConstPtr(self.node2addr))
-        knownclass2_info = NotVirtualStateInfo(self.cpu, 'r',
-                                    info.InstancePtrInfo(None, classbox2))
+        knownclass2_info = not_virtual(self.cpu, 'r',
+                                       info.InstancePtrInfo(None, classbox2))
 
-        constant_info = NotVirtualStateInfo(self.cpu, 'i',
-                                            ConstIntBound(1))
-        constant_ptr_info = NotVirtualStateInfo(self.cpu, 'r',
+        constant_info = not_virtual(self.cpu, 'i',
+                                    ConstIntBound(1))
+        constant_ptr_info = not_virtual(self.cpu, 'r',
                                     info.ConstPtrInfo(ConstPtr(self.nodeaddr)))
         constclass_val = info.ConstPtrInfo(ConstPtr(self.nodeaddr))
-        constclass_info = NotVirtualStateInfo(self.cpu, 'r', constclass_val)
-        constclass2_info = NotVirtualStateInfo(self.cpu, 'r',
+        constclass_info = not_virtual(self.cpu, 'r', constclass_val)
+        constclass2_info = not_virtual(self.cpu, 'r',
                     info.ConstPtrInfo(ConstPtr(self.node2addr)))
-        constantnull_info = NotVirtualStateInfo(self.cpu, 'r',
+        constantnull_info = not_virtual(self.cpu, 'r',
                     info.ConstPtrInfo(ConstPtr(self.nullptr)))
 
         # unknown unknown
@@ -260,9 +260,10 @@
         self.check_no_guards(unknown_info, knownclass_info)
 
         # unknown constant
-        self.check_no_guards(unknown_info, constant_info,
+        unknown_info_int = not_virtual(self.cpu, 'i', None)
+        self.check_no_guards(unknown_info_int, constant_info,
                              ConstInt(1), ConstIntBound(1))
-        self.check_no_guards(unknown_info, constant_info)
+        self.check_no_guards(unknown_info_int, constant_info)
 
 
         # nonnull unknown
@@ -293,11 +294,11 @@
         const_nonnull = ConstPtr(self.nodeaddr)
         const_nonnull2 = ConstPtr(self.node2addr)
         const_null = ConstPtr(lltype.nullptr(llmemory.GCREF.TO))
-        self.check_no_guards(nonnull_info, constant_info, const_nonnull,
+        self.check_no_guards(nonnull_info, constant_ptr_info, const_nonnull,
                              info.ConstPtrInfo(const_nonnull))
         self.check_invalid(nonnull_info, constantnull_info, const_null,
                            info.ConstPtrInfo(const_null))
-        self.check_no_guards(nonnull_info, constant_info)
+        self.check_no_guards(nonnull_info, constant_ptr_info)
         self.check_invalid(nonnull_info, constantnull_info)
 
 
@@ -392,8 +393,8 @@
         value1 = IntUnbounded()
         value1.make_ge(IntBound(0, 10))
         value1.make_le(IntBound(20, 30))
-        info1 = NotVirtualStateInfo(self.cpu, 'i', value1)
-        info2 = NotVirtualStateInfo(self.cpu, 'i', IntUnbounded())
+        info1 = not_virtual(self.cpu, 'i', value1)
+        info2 = not_virtual(self.cpu, 'i', IntUnbounded())
         expected = """
         [i0]
         i1 = int_ge(i0, 0)
@@ -408,18 +409,18 @@
         value1 = IntUnbounded()
         value1.make_ge(IntBound(0, 10))
         value1.make_le(IntBound(20, 30))
-        info1 = NotVirtualStateInfo(self.cpu, 'i', value1)
-        info2 = NotVirtualStateInfo(self.cpu, 'i', ConstIntBound(10000))
+        info1 = not_virtual(self.cpu, 'i', value1)
+        info2 = not_virtual(self.cpu, 'i', ConstIntBound(10000))
         self.check_invalid(info1, info2)
-        info1 = NotVirtualStateInfo(self.cpu, 'i', value1)
-        info2 = NotVirtualStateInfo(self.cpu, 'i', ConstIntBound(11))
+        info1 = not_virtual(self.cpu, 'i', value1)
+        info2 = not_virtual(self.cpu, 'i', ConstIntBound(11))
         self.check_no_guards(info1, info2)
 
     def test_known_class(self):
         classbox = self.cpu.ts.cls_of_box(InputArgRef(self.nodeaddr))
         value1 = info.InstancePtrInfo(None, classbox)
-        info1 = NotVirtualStateInfo(self.cpu, 'r', value1)
-        info2 = NotVirtualStateInfo(self.cpu, 'r', None)
+        info1 = not_virtual(self.cpu, 'r', value1)
+        info2 = not_virtual(self.cpu, 'r', None)
         expected = """
         [p0]
         guard_nonnull_class(p0, ConstClass(node_vtable)) []        
@@ -456,18 +457,18 @@
     def test_equal_inputargs(self):
         classbox = self.cpu.ts.cls_of_box(InputArgRef(self.nodeaddr))
         value = info.InstancePtrInfo(None, classbox)
-        knownclass_info = NotVirtualStateInfo(self.cpu, 'r', value)
+        knownclass_info = not_virtual(self.cpu, 'r', value)
         vstate1 = VirtualState([knownclass_info, knownclass_info])
         assert vstate1.generalization_of(vstate1, FakeOptimizer(self.cpu))
 
-        unknown_info1 = NotVirtualStateInfo(self.cpu, 'r', None)
+        unknown_info1 = not_virtual(self.cpu, 'r', None)
         vstate2 = VirtualState([unknown_info1, unknown_info1])
         assert vstate2.generalization_of(vstate2, FakeOptimizer(self.cpu))
         assert not vstate1.generalization_of(vstate2, FakeOptimizer(self.cpu))
         assert vstate2.generalization_of(vstate1, FakeOptimizer(self.cpu))
 
-        unknown_info1 = NotVirtualStateInfo(self.cpu, 'r', None)
-        unknown_info2 = NotVirtualStateInfo(self.cpu, 'r', None)
+        unknown_info1 = not_virtual(self.cpu, 'r', None)
+        unknown_info2 = not_virtual(self.cpu, 'r', None)
         vstate3 = VirtualState([unknown_info1, unknown_info2])
         assert vstate3.generalization_of(vstate2, FakeOptimizer(self.cpu))
         assert vstate3.generalization_of(vstate1, FakeOptimizer(self.cpu))
@@ -494,9 +495,9 @@
     def test_generate_guards_on_virtual_fields_matches_array(self):
         classbox = self.cpu.ts.cls_of_box(InputArgRef(self.nodeaddr))
         innervalue1 = info.InstancePtrInfo(None, classbox)
-        innerinfo1 = NotVirtualStateInfo(self.cpu, 'r', innervalue1)
+        innerinfo1 = not_virtual(self.cpu, 'r', innervalue1)
         innerinfo1.position = 1
-        innerinfo2 = NotVirtualStateInfo(self.cpu, 'r', None)
+        innerinfo2 = not_virtual(self.cpu, 'r', None)
         innerinfo2.position = 1
 
         descr = ArrayDescr(lltype.GcArray(llmemory.GCREF), self.cpu)
@@ -524,9 +525,9 @@
     def test_generate_guards_on_virtual_fields_matches_instance(self):
         classbox = self.cpu.ts.cls_of_box(InputArgRef(self.nodeaddr))
         innervalue1 = info.InstancePtrInfo(None, classbox)
-        innerinfo1 = NotVirtualStateInfo(self.cpu, 'r', innervalue1)
+        innerinfo1 = not_virtual(self.cpu, 'r', innervalue1)
         innerinfo1.position = 1
-        innerinfo2 = NotVirtualStateInfo(self.cpu, 'r', None)
+        innerinfo2 = not_virtual(self.cpu, 'r', None)
         innerinfo2.position = 1
 
         info1 = VirtualStateInfo(ConstInt(42), [self.nextdescr])
@@ -552,9 +553,9 @@
     def test_generate_guards_on_virtual_fields_matches_struct(self):
         constclassbox = self.cpu.ts.cls_of_box(InputArgRef(self.nodeaddr))
         innervalue1 = info.InstancePtrInfo(None, constclassbox)
-        innerinfo1 = NotVirtualStateInfo(self.cpu, 'r', innervalue1)
+        innerinfo1 = not_virtual(self.cpu, 'r', innervalue1)
         innerinfo1.position = 1
-        innerinfo2 = NotVirtualStateInfo(self.cpu, 'r', None)
+        innerinfo2 = not_virtual(self.cpu, 'r', None)
         innerinfo2.position = 1
 
         structdescr = self.nodesize
@@ -583,9 +584,9 @@
     def test_generate_guards_on_virtual_fields_matches_arraystruct(self):
         constclassbox = self.cpu.ts.cls_of_box(InputArgRef(self.nodeaddr))
         innervalue1 = info.InstancePtrInfo(None, constclassbox)
-        innerinfo1 = NotVirtualStateInfo(self.cpu, 'r', innervalue1)
+        innerinfo1 = not_virtual(self.cpu, 'r', innervalue1)
         innerinfo1.position = 1
-        innerinfo2 = NotVirtualStateInfo(self.cpu, 'r', None)
+        innerinfo2 = not_virtual(self.cpu, 'r', None)
         innerinfo2.position = 1
 
         NODE = lltype.Struct('NODE', ('x', llmemory.GCREF))
@@ -627,7 +628,7 @@
         assert vstate1.generalization_of(vstate1, FakeOptimizer(self.cpu))
 
         info2 = VirtualStateInfo(ConstInt(42), [1, 2])
-        unknown_info1 = NotVirtualStateInfo(self.cpu, 'r',
+        unknown_info1 = not_virtual(self.cpu, 'r',
                                             info.InstancePtrInfo())
         info2.fieldstate = [unknown_info1, unknown_info1]
         vstate2 = VirtualState([info2])
@@ -636,9 +637,9 @@
         assert vstate2.generalization_of(vstate1, FakeOptimizer(self.cpu))
 
         info3 = VirtualStateInfo(ConstInt(42), [1, 2])
-        unknown_info1 = NotVirtualStateInfo(self.cpu, 'r',
+        unknown_info1 = not_virtual(self.cpu, 'r',
                                             info.InstancePtrInfo())
-        unknown_info2 = NotVirtualStateInfo(self.cpu, 'r',
+        unknown_info2 = not_virtual(self.cpu, 'r',
                                             info.InstancePtrInfo())
         info3.fieldstate = [unknown_info1, unknown_info2]
         vstate3 = VirtualState([info3])        
@@ -651,7 +652,7 @@
         info1 = VirtualStateInfo(ConstInt(42), [1, 2])
         classbox = self.cpu.ts.cls_of_box(InputArgRef(self.nodeaddr))
         value = info.InstancePtrInfo(None, classbox)
-        knownclass_info = NotVirtualStateInfo(self.cpu, 'r', value)
+        knownclass_info = not_virtual(self.cpu, 'r', value)
         info1.fieldstate = [knownclass_info, knownclass_info]
         vstate1 = VirtualState([info1])
         assert vstate1.generalization_of(vstate1, FakeOptimizer(self.cpu))
@@ -659,7 +660,7 @@
         info2 = VirtualStateInfo(ConstInt(42), [1, 2])
         classbox = self.cpu.ts.cls_of_box(InputArgRef(self.node2addr))
         value = info.InstancePtrInfo(None, classbox)
-        knownclass_info = NotVirtualStateInfo(self.cpu, 'r', value)
+        knownclass_info = not_virtual(self.cpu, 'r', value)
         info2.fieldstate = [knownclass_info, knownclass_info]
         vstate2 = VirtualState([info2])
         assert vstate2.generalization_of(vstate2, FakeOptimizer(self.cpu))
@@ -671,7 +672,7 @@
         info1 = VirtualStateInfo(ConstInt(42), [10, 20])
         classbox = self.cpu.ts.cls_of_box(InputArgRef(self.nodeaddr))
         value = info.InstancePtrInfo(None, classbox)
-        knownclass_info = NotVirtualStateInfo(self.cpu, 'r', value)
+        knownclass_info = not_virtual(self.cpu, 'r', value)
         info1.fieldstate = [knownclass_info, knownclass_info]
         vstate1 = VirtualState([info1])
         assert vstate1.generalization_of(vstate1, FakeOptimizer(self.cpu))
@@ -679,7 +680,7 @@
         info2 = VirtualStateInfo(ConstInt(42), [1, 2])
         classbox = self.cpu.ts.cls_of_box(InputArgRef(self.node2addr))
         value = info.InstancePtrInfo(None, classbox)
-        knownclass_info = NotVirtualStateInfo(self.cpu, 'r', value)
+        knownclass_info = not_virtual(self.cpu, 'r', value)
         info2.fieldstate = [knownclass_info, knownclass_info]
         vstate2 = VirtualState([info2])
         assert vstate2.generalization_of(vstate2, FakeOptimizer(self.cpu))
@@ -691,7 +692,7 @@
         info1 = VirtualStateInfo(ConstInt(42), [1, 2])
         classbox = self.cpu.ts.cls_of_box(InputArgRef(self.nodeaddr))
         value = info.InstancePtrInfo(None, classbox)
-        knownclass_info = NotVirtualStateInfo(self.cpu, 'r', value)
+        knownclass_info = not_virtual(self.cpu, 'r', value)
         info1.fieldstate = [knownclass_info, knownclass_info]
         vstate1 = VirtualState([info1])
         assert vstate1.generalization_of(vstate1, FakeOptimizer(self.cpu))
@@ -699,24 +700,24 @@
         info2 = VirtualStateInfo(ConstInt(7), [1, 2])
         classbox = self.cpu.ts.cls_of_box(InputArgRef(self.node2addr))
         value = info.InstancePtrInfo(None, classbox)
-        knownclass_info = NotVirtualStateInfo(self.cpu, 'r', value)
+        knownclass_info = not_virtual(self.cpu, 'r', value)
         info2.fieldstate = [knownclass_info, knownclass_info]
         vstate2 = VirtualState([info2])
         assert vstate2.generalization_of(vstate2, FakeOptimizer(self.cpu))
 
         assert not vstate2.generalization_of(vstate1, FakeOptimizer(self.cpu))
         assert not vstate1.generalization_of(vstate2, FakeOptimizer(self.cpu))
-        
+
     def test_nonvirtual_is_not_virtual(self):
         info1 = VirtualStateInfo(ConstInt(42), [1, 2])
         classbox = self.cpu.ts.cls_of_box(InputArgRef(self.nodeaddr))
         value = info.InstancePtrInfo(None, classbox)
-        knownclass_info = NotVirtualStateInfo(self.cpu, 'r', value)
+        knownclass_info = not_virtual(self.cpu, 'r', value)
         info1.fieldstate = [knownclass_info, knownclass_info]
         vstate1 = VirtualState([info1])
         assert vstate1.generalization_of(vstate1, FakeOptimizer(self.cpu))
 
-        info2 = NotVirtualStateInfo(self.cpu, 'r', value)
+        info2 = not_virtual(self.cpu, 'r', value)
         vstate2 = VirtualState([info2])
         assert vstate2.generalization_of(vstate2, FakeOptimizer(self.cpu))
 
@@ -727,7 +728,7 @@
         info1 = VArrayStateInfo(42)
         classbox = self.cpu.ts.cls_of_box(InputArgRef(self.nodeaddr))
         value = info.InstancePtrInfo(None, classbox)
-        knownclass_info = NotVirtualStateInfo(self.cpu, 'r', value)
+        knownclass_info = not_virtual(self.cpu, 'r', value)
         info1.fieldstate = [knownclass_info, knownclass_info]
         vstate1 = VirtualState([info1])
         assert vstate1.generalization_of(vstate1, FakeOptimizer(self.cpu))
@@ -735,7 +736,7 @@
         info2 = VArrayStateInfo(42)
         classbox = self.cpu.ts.cls_of_box(InputArgRef(self.node2addr))
         value = info.InstancePtrInfo(None, classbox)
-        knownclass_info = NotVirtualStateInfo(self.cpu, 'r', value)
+        knownclass_info = not_virtual(self.cpu, 'r', value)
         info2.fieldstate = [knownclass_info, knownclass_info]
         vstate2 = VirtualState([info2])
         assert vstate2.generalization_of(vstate2, FakeOptimizer(self.cpu))
@@ -747,7 +748,7 @@
         info1 = VArrayStateInfo(42)
         classbox = self.cpu.ts.cls_of_box(InputArgRef(self.nodeaddr))
         value = info.InstancePtrInfo(None, classbox)
-        knownclass_info = NotVirtualStateInfo(self.cpu, 'r', value)
+        knownclass_info = not_virtual(self.cpu, 'r', value)
         info1.fieldstate = [knownclass_info, knownclass_info]
         vstate1 = VirtualState([info1])
         assert vstate1.generalization_of(vstate1, FakeOptimizer(self.cpu))
@@ -755,7 +756,7 @@
         info2 = VArrayStateInfo(42)
         classbox = self.cpu.ts.cls_of_box(InputArgRef(self.node2addr))
         value = info.InstancePtrInfo(None, classbox)
-        knownclass_info = NotVirtualStateInfo(self.cpu, 'r', value)
+        knownclass_info = not_virtual(self.cpu, 'r', value)
         info2.fieldstate = [knownclass_info]
         vstate2 = VirtualState([info2])
         assert vstate2.generalization_of(vstate2, FakeOptimizer(self.cpu))
@@ -793,7 +794,7 @@
     def test_crash_varay_clear(self):
         classbox = self.cpu.ts.cls_of_box(InputArgRef(self.nodeaddr))
         innervalue1 = info.InstancePtrInfo(None, classbox)
-        innerinfo1 = NotVirtualStateInfo(self.cpu, 'r', innervalue1)
+        innerinfo1 = not_virtual(self.cpu, 'r', innervalue1)
         innerinfo1.position = 1
         innerinfo1.position_in_notvirtuals = 0
 
diff --git a/rpython/jit/metainterp/optimizeopt/virtualstate.py 
b/rpython/jit/metainterp/optimizeopt/virtualstate.py
--- a/rpython/jit/metainterp/optimizeopt/virtualstate.py
+++ b/rpython/jit/metainterp/optimizeopt/virtualstate.py
@@ -350,40 +350,23 @@
     def debug_header(self, indent):
         debug_print(indent + 'VArrayStructStateInfo(%d):' % self.position)
 
+
+def not_virtual(cpu, type, info):
+    if type == 'i':
+        return NotVirtualStateInfoInt(cpu, type, info)
+    if type == 'r':
+        return NotVirtualStateInfoPtr(cpu, type, info)
+    return NotVirtualStateInfo(cpu, type, info)
+
+
 class NotVirtualStateInfo(AbstractVirtualStateInfo):
-    lenbound = None
-    intbound = None
     level = LEVEL_UNKNOWN
     constbox = None
-    known_class = None
-    
+
     def __init__(self, cpu, type, info):
         if info and info.is_constant():
             self.level = LEVEL_CONSTANT
             self.constbox = info.getconst()
-            if type == 'r':
-                self.known_class = info.get_known_class(cpu)
-            elif type == 'i':
-                self.intbound = info
-        elif type == 'r':
-            if info:
-                self.known_class = info.get_known_class(cpu)
-                if self.known_class:
-                    self.level = LEVEL_KNOWNCLASS
-                elif info.is_nonnull():
-                    self.level = LEVEL_NONNULL
-                self.lenbound = info.getlenbound(None)
-        elif type == 'i':
-            if isinstance(info, IntBound):
-                if info.lower < MININT / 2:
-                    info.lower = MININT
-                if info.upper > MAXINT / 2:
-                    info.upper = MAXINT
-                self.intbound = info
-        elif type == 'f':
-            if info and info.is_constant():
-                self.level = LEVEL_CONSTANT
-                self.constbox = info.getconst()
 
     def is_const(self):
         return self.constbox is not None
@@ -394,84 +377,15 @@
     def _generate_guards(self, other, box, runtime_box, state):
         # XXX This will always retrace instead of forcing anything which
         # might be what we want sometimes?
-        if not isinstance(other, NotVirtualStateInfo):
-            raise VirtualStatesCantMatch(
-                    'The VirtualStates does not match as a ' +
-                    'virtual appears where a pointer is needed ' +
-                    'and it is too late to force it.')
-
-
         extra_guards = state.extra_guards
-        cpu = state.cpu
-        if self.lenbound:
-            if other.lenbound is None:
-                other_bound = IntLowerBound(0)
-            else:
-                other_bound = other.lenbound
-            if not self.lenbound.contains_bound(other_bound):
-                raise VirtualStatesCantMatch("length bound does not match")
-
         if self.level == LEVEL_UNKNOWN:
-            # confusingly enough, this is done also for pointers
-            # which have the full range as the "bound", so it always works
-            return self._generate_guards_intbounds(other, box, runtime_box,
-                                                   extra_guards,
-                                                   state.optimizer)
-
-        # the following conditions often peek into the runtime value that the
-        # box had when tracing. This value is only used as an educated guess.
-        # It is used here to choose between either emitting a guard and jumping
-        # to an existing compiled loop or retracing the loop. Both alternatives
-        # will always generate correct behaviour, but performance will differ.
-        elif self.level == LEVEL_NONNULL:
-            if other.level == LEVEL_UNKNOWN:
-                if runtime_box is not None and runtime_box.nonnull():
-                    op = ResOperation(rop.GUARD_NONNULL, [box])
-                    extra_guards.append(op)
-                    return
-                else:
-                    raise VirtualStatesCantMatch("other not known to be 
nonnull")
-            elif other.level == LEVEL_NONNULL:
-                return
-            elif other.level == LEVEL_KNOWNCLASS:
-                return # implies nonnull
-            else:
-                assert other.level == LEVEL_CONSTANT
-                assert other.constbox
-                if not other.constbox.nonnull():
-                    raise VirtualStatesCantMatch("constant is null")
-                return
-
-        elif self.level == LEVEL_KNOWNCLASS:
-            if other.level == LEVEL_UNKNOWN:
-                if (runtime_box and runtime_box.nonnull() and
-              self.known_class.same_constant(cpu.ts.cls_of_box(runtime_box))):
-                    op = ResOperation(rop.GUARD_NONNULL_CLASS, [box, 
self.known_class])
-                    extra_guards.append(op)
-                    return
-                else:
-                    raise VirtualStatesCantMatch("other's class is unknown")
-            elif other.level == LEVEL_NONNULL:
-                if (runtime_box and self.known_class.same_constant(
-                        cpu.ts.cls_of_box(runtime_box))):
-                    op = ResOperation(rop.GUARD_CLASS, [box, self.known_class])
-                    extra_guards.append(op)
-                    return
-                else:
-                    raise VirtualStatesCantMatch("other's class is unknown")
-            elif other.level == LEVEL_KNOWNCLASS:
-                if self.known_class.same_constant(other.known_class):
-                    return
-                raise VirtualStatesCantMatch("classes don't match")
-            else:
-                assert other.level == LEVEL_CONSTANT
-                if (other.constbox.nonnull() and
-                        
self.known_class.same_constant(cpu.ts.cls_of_box(other.constbox))):
-                    return
-                else:
-                    raise VirtualStatesCantMatch("classes don't match")
-
+            return self._generate_guards_unkown(other, box, runtime_box,
+                                                extra_guards,
+                                                state)
         else:
+            if not isinstance(other, NotVirtualStateInfo):
+                raise VirtualStatesCantMatch(
+                        'comparing a constant against something that is a 
virtual')
             assert self.level == LEVEL_CONSTANT
             if other.level == LEVEL_CONSTANT:
                 if self.constbox.same_constant(other.constbox):
@@ -485,19 +399,9 @@
                 raise VirtualStatesCantMatch("other not constant")
         assert 0, "unreachable"
 
-    def _generate_guards_intbounds(self, other, box, runtime_box, extra_guards,
-                                   optimizer):
-        if self.intbound is None:
-            return
-        if self.intbound.contains_bound(other.intbound):
-            return
-        if (runtime_box is not None and
-            self.intbound.contains(runtime_box.getint())):
-            # this may generate a few more guards than needed, but they are
-            # optimized away when emitting them
-            self.intbound.make_guards(box, extra_guards, optimizer)
-            return
-        raise VirtualStatesCantMatch("intbounds don't match")
+    def _generate_guards_unkown(self, other, box, runtime_box, extra_guards,
+                                state):
+        return
 
     def enum_forced_boxes(self, boxes, box, optimizer, force_boxes=False):
         if self.level == LEVEL_CONSTANT:
@@ -553,8 +457,145 @@
         if self.lenbound:
             lb = ', ' + self.lenbound.bound.__repr__()
 
-        debug_print(indent + mark + 'NotVirtualInfo(%d' % self.position +
-                    ', ' + l + ', ' + self.intbound.__repr__() + lb + ')')
+        result = indent + mark + 'NotVirtualStateInfo(%d' % self.position + ', 
' + l
+        extra = self._extra_repr()
+        if extra:
+            result += ', ' + extra
+        result += lb + ')'
+        debug_print(result)
+
+class NotVirtualStateInfoInt(NotVirtualStateInfo):
+    intbound = None
+
+    def __init__(self, cpu, type, info):
+        NotVirtualStateInfo.__init__(self, cpu, type, info)
+        assert type == 'i'
+        if isinstance(info, IntBound):
+            if info.lower < MININT / 2:
+                info.lower = MININT
+            if info.upper > MAXINT / 2:
+                info.upper = MAXINT
+            self.intbound = info
+
+    def _generate_guards_unkown(self, other, box, runtime_box, extra_guards,
+                                state):
+        other_intbound = None
+        if isinstance(other, NotVirtualStateInfoInt):
+            other_intbound = other.intbound
+        if self.intbound is None:
+            return
+        if self.intbound.contains_bound(other_intbound):
+            return
+        if (runtime_box is not None and
+            self.intbound.contains(runtime_box.getint())):
+            # this may generate a few more guards than needed, but they are
+            # optimized away when emitting them
+            self.intbound.make_guards(box, extra_guards, state.optimizer)
+            return
+        raise VirtualStatesCantMatch("intbounds don't match")
+
+    def _extra_repr(self):
+        return self.intbound.__repr__()
+
+
+class NotVirtualStateInfoPtr(NotVirtualStateInfo):
+    lenbound = None
+    known_class = None
+
+    def __init__(self, cpu, type, info):
+        if info:
+            self.known_class = info.get_known_class(cpu)
+            if self.known_class:
+                self.level = LEVEL_KNOWNCLASS
+            elif info.is_nonnull():
+                self.level = LEVEL_NONNULL
+            self.lenbound = info.getlenbound(None)
+        # might set it to LEVEL_CONSTANT
+        NotVirtualStateInfo.__init__(self, cpu, type, info)
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to