[pypy-commit] pypy fix-vmprof-stacklet-switch-2: merge default

antocuni Sat, 16 Dec 2017 03:03:07 -0800

Author: Antonio Cuni <anto.c...@gmail.com>
Branch: fix-vmprof-stacklet-switch-2
Changeset: r93446:92e4ca3c2daa
Date: 2017-12-16 12:02 +0100
http://bitbucket.org/pypy/pypy/changeset/92e4ca3c2daa/


Log:    merge default

diff too long, truncating to 2000 out of 2875 lines

diff --git a/pypy/module/test_lib_pypy/test_json_extra.py 
b/extra_tests/test_json.py
rename from pypy/module/test_lib_pypy/test_json_extra.py
rename to extra_tests/test_json.py
--- a/pypy/module/test_lib_pypy/test_json_extra.py
+++ b/extra_tests/test_json.py
@@ -1,4 +1,6 @@
-import py, json
+import pytest
+import json
+from hypothesis import given, strategies
 
 def is_(x, y):
     return type(x) is type(y) and x == y
@@ -6,12 +8,26 @@
 def test_no_ensure_ascii():
     assert is_(json.dumps(u"\u1234", ensure_ascii=False), u'"\u1234"')
     assert is_(json.dumps("\xc0", ensure_ascii=False), '"\xc0"')
-    e = py.test.raises(UnicodeDecodeError, json.dumps,
-                       (u"\u1234", "\xc0"), ensure_ascii=False)
-    assert str(e.value).startswith("'ascii' codec can't decode byte 0xc0 ")
-    e = py.test.raises(UnicodeDecodeError, json.dumps,
-                       ("\xc0", u"\u1234"), ensure_ascii=False)
-    assert str(e.value).startswith("'ascii' codec can't decode byte 0xc0 ")
+    with pytest.raises(UnicodeDecodeError) as excinfo:
+        json.dumps((u"\u1234", "\xc0"), ensure_ascii=False)
+    assert str(excinfo.value).startswith(
+        "'ascii' codec can't decode byte 0xc0 ")
+    with pytest.raises(UnicodeDecodeError) as excinfo:
+        json.dumps(("\xc0", u"\u1234"), ensure_ascii=False)
+    assert str(excinfo.value).startswith(
+        "'ascii' codec can't decode byte 0xc0 ")
 
 def test_issue2191():
     assert is_(json.dumps(u"xxx", ensure_ascii=False), u'"xxx"')
+
+jsondata = strategies.recursive(
+    strategies.none() |
+    strategies.booleans() |
+    strategies.floats(allow_nan=False) |
+    strategies.text(),
+    lambda children: strategies.lists(children) |
+        strategies.dictionaries(strategies.text(), children))
+
+@given(jsondata)
+def test_roundtrip(d):
+    assert json.loads(json.dumps(d)) == d
diff --git a/extra_tests/test_textio.py b/extra_tests/test_textio.py
--- a/extra_tests/test_textio.py
+++ b/extra_tests/test_textio.py
@@ -1,28 +1,48 @@
 from hypothesis import given, strategies as st
 
 from io import BytesIO, TextIOWrapper
+import os
 
-LINESEP = ['', '\r', '\n', '\r\n']
+def translate_newlines(text):
+    text = text.replace('\r\n', '\n')
+    text = text.replace('\r', '\n')
+    return text.replace('\n', os.linesep)
 
 @st.composite
-def text_with_newlines(draw):
-    sep = draw(st.sampled_from(LINESEP))
-    lines = draw(st.lists(st.text(max_size=10), max_size=10))
-    return sep.join(lines)
+def st_readline_universal(
+        draw, st_nlines=st.integers(min_value=0, max_value=10)):
+    n_lines = draw(st_nlines)
+    lines = draw(st.lists(
+        st.text(st.characters(blacklist_characters='\r\n')),
+        min_size=n_lines, max_size=n_lines))
+    limits = []
+    for line in lines:
+        limit = draw(st.integers(min_value=0, max_value=len(line) + 5))
+        limits.append(limit)
+        limits.append(-1)
+    endings = draw(st.lists(
+        st.sampled_from(['\n', '\r', '\r\n']),
+        min_size=n_lines, max_size=n_lines))
+    return (
+        ''.join(line + ending for line, ending in zip(lines, endings)),
+        limits)
 
-@given(txt=text_with_newlines(),
-       mode=st.sampled_from(['\r', '\n', '\r\n', '']),
-       limit=st.integers(min_value=-1))
-def test_readline(txt, mode, limit):
+@given(data=st_readline_universal(),
+       mode=st.sampled_from(['\r', '\n', '\r\n', '', None]))
+def test_readline(data, mode):
+    txt, limits = data
     textio = TextIOWrapper(
-        BytesIO(txt.encode('utf-8')), encoding='utf-8', newline=mode)
+        BytesIO(txt.encode('utf-8', 'surrogatepass')),
+        encoding='utf-8', errors='surrogatepass', newline=mode)
     lines = []
-    while True:
+    for limit in limits:
         line = textio.readline(limit)
-        if limit > 0:
-            assert len(line) < limit
+        if limit >= 0:
+            assert len(line) <= limit
         if line:
             lines.append(line)
-        else:
+        elif limit:
             break
-    assert u''.join(lines) == txt
+    if mode is None:
+        txt = translate_newlines(txt)
+    assert txt.startswith(u''.join(lines))
diff --git a/lib_pypy/resource.py b/lib_pypy/resource.py
--- a/lib_pypy/resource.py
+++ b/lib_pypy/resource.py
@@ -20,6 +20,7 @@
 or via the attributes ru_utime, ru_stime, ru_maxrss, and so on."""
 
     __metaclass__ = _structseq.structseqtype
+    name = "resource.struct_rusage"
 
     ru_utime = _structseq.structseqfield(0,    "user time used")
     ru_stime = _structseq.structseqfield(1,    "system time used")
diff --git a/pypy/doc/build.rst b/pypy/doc/build.rst
--- a/pypy/doc/build.rst
+++ b/pypy/doc/build.rst
@@ -149,7 +149,7 @@
     xz-devel # For lzma on PyPy3.
     (XXX plus the SLES11 version of libgdbm-dev and tk-dev)
 
-On Mac OS X::
+On Mac OS X:
 
 Most of these build-time dependencies are installed alongside
 the Developer Tools. However, note that in order for the installation to
diff --git a/pypy/doc/cpython_differences.rst b/pypy/doc/cpython_differences.rst
--- a/pypy/doc/cpython_differences.rst
+++ b/pypy/doc/cpython_differences.rst
@@ -355,7 +355,11 @@
 containers (as list items or in sets for example), the exact rule of
 equality used is "``if x is y or x == y``" (on both CPython and PyPy);
 as a consequence, because all ``nans`` are identical in PyPy, you
-cannot have several of them in a set, unlike in CPython.  (Issue `#1974`__)
+cannot have several of them in a set, unlike in CPython.  (Issue `#1974`__).
+Another consequence is that ``cmp(float('nan'), float('nan')) == 0``, because
+``cmp`` checks with ``is`` first whether the arguments are identical (there is
+no good value to return from this call to ``cmp``, because ``cmp`` pretends
+that there is a total order on floats, but that is wrong for NaNs).
 
 .. __: 
https://bitbucket.org/pypy/pypy/issue/1974/different-behaviour-for-collections-of
 
diff --git a/pypy/doc/how-to-release.rst b/pypy/doc/how-to-release.rst
--- a/pypy/doc/how-to-release.rst
+++ b/pypy/doc/how-to-release.rst
@@ -62,7 +62,7 @@
   * go to pypy/tool/release and run
     ``force-builds.py <release branch>``
     The following JIT binaries should be built, however, we need more buildbots
-    windows, linux-32, linux-64, osx64, armhf-raring, armhf-raspberrian, armel,
+    windows, linux-32, linux-64, osx64, armhf-raspberrian, armel,
     freebsd64 
 
   * wait for builds to complete, make sure there are no failures
diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -1,30 +1,42 @@
-===========================
-What's new in PyPy2.7 5.10+
-===========================
-
-.. this is a revision shortly after release-pypy2.7-v5.9.0
-.. startrev:d56dadcef996
-
-.. branch: cppyy-packaging
-Cleanup and improve cppyy packaging
-
-.. branch: docs-osx-brew-openssl
-
-.. branch: keep-debug-symbols
-Add a smartstrip tool, which can optionally keep the debug symbols in a
-separate file, instead of just stripping them away. Use it in packaging
-
-.. branch: bsd-patches
-Fix failures on FreeBSD, contributed by David Naylor as patches on the issue
-tracker (issues 2694, 2695, 2696, 2697)
-
-.. branch: run-extra-tests
-Run extra_tests/ in buildbot
-
-.. branch: vmprof-0.4.10
-Upgrade the _vmprof backend to vmprof 0.4.10
-
-.. branch: fix-vmprof-stacklet-switch
-.. branch: fix-vmprof-stacklet-switch-2
-Fix vmprof+ continulet (i.e. greenelts, eventlet, gevent, ...)
-
+===========================
+What's new in PyPy2.7 5.10+
+===========================
+
+.. this is a revision shortly after release-pypy2.7-v5.9.0
+.. startrev:d56dadcef996
+
+
+.. branch: cppyy-packaging
+
+Cleanup and improve cppyy packaging
+
+.. branch: docs-osx-brew-openssl
+
+.. branch: keep-debug-symbols
+
+Add a smartstrip tool, which can optionally keep the debug symbols in a
+separate file, instead of just stripping them away. Use it in packaging
+
+.. branch: bsd-patches
+
+Fix failures on FreeBSD, contributed by David Naylor as patches on the issue
+tracker (issues 2694, 2695, 2696, 2697)
+
+.. branch: run-extra-tests
+
+Run extra_tests/ in buildbot
+
+.. branch: vmprof-0.4.10
+
+Upgrade the _vmprof backend to vmprof 0.4.10
+
+.. branch: fix-vmprof-stacklet-switch
+.. branch: fix-vmprof-stacklet-switch-2
+Fix a vmprof+continulets (i.e. greenelts, eventlet, gevent, ...)
+
+.. branch: win32-vcvars
+
+.. branch: rdict-fast-hash
+
+Make it possible to declare that the hash function of an r_dict is fast in 
RPython.
+
diff --git a/pypy/doc/whatsnew-pypy2-5.6.0.rst 
b/pypy/doc/whatsnew-pypy2-5.6.0.rst
--- a/pypy/doc/whatsnew-pypy2-5.6.0.rst
+++ b/pypy/doc/whatsnew-pypy2-5.6.0.rst
@@ -101,7 +101,7 @@
 
 .. branch: newinitwarn
 
-Match CPython's stricter handling of __new/init__ arguments
+Match CPython's stricter handling of ``__new__``/``__init__`` arguments
 
 .. branch: openssl-1.1
 
diff --git a/pypy/doc/windows.rst b/pypy/doc/windows.rst
--- a/pypy/doc/windows.rst
+++ b/pypy/doc/windows.rst
@@ -11,7 +11,7 @@
 
 To build pypy-c you need a working python environment, and a C compiler.
 It is possible to translate with a CPython 2.6 or later, but this is not
-the preferred way, because it will take a lot longer to run &#65533; depending
+the preferred way, because it will take a lot longer to run &#8211; depending
 on your architecture, between two and three times as long. So head to
 `our downloads`_ and get the latest stable version.
 
@@ -103,6 +103,7 @@
 must also copy the ``vcvarsall.bat`` file fron the ``...\9.0`` directory to the
 ``...\9.0\VC`` directory, and edit it, changing the lines that set
 ``VCINSTALLDIR`` and ``WindowsSdkDir``::
+
     set VCINSTALLDIR=%~dp0\
     set WindowsSdkDir=%~dp0\..\WinSDK\
 
diff --git a/pypy/interpreter/astcompiler/test/test_astbuilder.py 
b/pypy/interpreter/astcompiler/test/test_astbuilder.py
--- a/pypy/interpreter/astcompiler/test/test_astbuilder.py
+++ b/pypy/interpreter/astcompiler/test/test_astbuilder.py
@@ -1246,3 +1246,7 @@
         exc = py.test.raises(SyntaxError, self.get_ast, input).value
         assert exc.msg == ("(unicode error) 'unicodeescape' codec can't decode"
                            " bytes in position 0-1: truncated \\xXX escape")
+        input = "u'\\x1'"
+        exc = py.test.raises(SyntaxError, self.get_ast, input).value
+        assert exc.msg == ("(unicode error) 'unicodeescape' codec can't decode"
+                           " bytes in position 0-2: truncated \\xXX escape")
diff --git a/pypy/interpreter/test/test_unicodehelper.py 
b/pypy/interpreter/test/test_unicodehelper.py
--- a/pypy/interpreter/test/test_unicodehelper.py
+++ b/pypy/interpreter/test/test_unicodehelper.py
@@ -1,4 +1,7 @@
-from pypy.interpreter.unicodehelper import encode_utf8, decode_utf8
+import pytest
+import struct
+from pypy.interpreter.unicodehelper import (
+    encode_utf8, decode_utf8, unicode_encode_utf_32_be)
 
 class FakeSpace:
     pass
@@ -24,3 +27,23 @@
     assert map(ord, got) == [0xd800, 0xdc00]
     got = decode_utf8(space, "\xf0\x90\x80\x80")
     assert map(ord, got) == [0x10000]
+
+@pytest.mark.parametrize('unich', [u"\ud800", u"\udc80"])
+def test_utf32_surrogates(unich):
+    assert (unicode_encode_utf_32_be(unich, 1, None) ==
+            struct.pack('>i', ord(unich)))
+    with pytest.raises(UnicodeEncodeError):
+        unicode_encode_utf_32_be(unich, 1, None, allow_surrogates=False)
+
+    def replace_with(ru, rs):
+        def errorhandler(errors, enc, msg, u, startingpos, endingpos):
+            if errors == 'strict':
+                raise UnicodeEncodeError(enc, u, startingpos, endingpos, msg)
+            return ru, rs, endingpos
+        return unicode_encode_utf_32_be(
+            u"<%s>" % unich, 3, None,
+            errorhandler, allow_surrogates=False)
+
+    assert replace_with(u'rep', None) == u'<rep>'.encode('utf-32-be')
+    assert (replace_with(None, '\xca\xfe\xca\xfe') ==
+            '\x00\x00\x00<\xca\xfe\xca\xfe\x00\x00\x00>')
diff --git a/pypy/interpreter/unicodehelper.py 
b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -1,7 +1,11 @@
+from rpython.rlib.objectmodel import specialize
+from rpython.rlib.rarithmetic import intmask
+from rpython.rlib.rstring import StringBuilder, UnicodeBuilder
+from rpython.rlib import runicode
+from rpython.rlib.runicode import (
+    default_unicode_error_encode, default_unicode_error_decode,
+    MAXUNICODE, BYTEORDER, BYTEORDER2, UNICHR)
 from pypy.interpreter.error import OperationError
-from rpython.rlib.objectmodel import specialize
-from rpython.rlib import runicode
-from pypy.module._codecs import interp_codecs
 
 @specialize.memo()
 def decode_error_handler(space):
@@ -37,6 +41,7 @@
 
 # These functions take and return unwrapped rpython strings and unicodes
 def decode_unicode_escape(space, string):
+    from pypy.module._codecs import interp_codecs
     state = space.fromcache(interp_codecs.CodecState)
     unicodedata_handler = state.get_unicodedata_handler(space)
     result, consumed = runicode.str_decode_unicode_escape(
@@ -71,3 +76,229 @@
         uni, len(uni), "strict",
         errorhandler=None,
         allow_surrogates=True)
+
+# ____________________________________________________________
+# utf-32
+
+def str_decode_utf_32(s, size, errors, final=True,
+                      errorhandler=None):
+    result, length, byteorder = str_decode_utf_32_helper(
+        s, size, errors, final, errorhandler, "native")
+    return result, length
+
+def str_decode_utf_32_be(s, size, errors, final=True,
+                         errorhandler=None):
+    result, length, byteorder = str_decode_utf_32_helper(
+        s, size, errors, final, errorhandler, "big")
+    return result, length
+
+def str_decode_utf_32_le(s, size, errors, final=True,
+                         errorhandler=None):
+    result, length, byteorder = str_decode_utf_32_helper(
+        s, size, errors, final, errorhandler, "little")
+    return result, length
+
+def py3k_str_decode_utf_32(s, size, errors, final=True,
+                           errorhandler=None):
+    result, length, byteorder = str_decode_utf_32_helper(
+        s, size, errors, final, errorhandler, "native", 'utf-32-' + BYTEORDER2)
+    return result, length
+
+def py3k_str_decode_utf_32_be(s, size, errors, final=True,
+                              errorhandler=None):
+    result, length, byteorder = str_decode_utf_32_helper(
+        s, size, errors, final, errorhandler, "big", 'utf-32-be')
+    return result, length
+
+def py3k_str_decode_utf_32_le(s, size, errors, final=True,
+                              errorhandler=None):
+    result, length, byteorder = str_decode_utf_32_helper(
+        s, size, errors, final, errorhandler, "little", 'utf-32-le')
+    return result, length
+
+BOM32_DIRECT = intmask(0x0000FEFF)
+BOM32_REVERSE = intmask(0xFFFE0000)
+
+def str_decode_utf_32_helper(s, size, errors, final=True,
+                             errorhandler=None,
+                             byteorder="native",
+                             public_encoding_name='utf32'):
+    if errorhandler is None:
+        errorhandler = default_unicode_error_decode
+    bo = 0
+
+    if BYTEORDER == 'little':
+        iorder = [0, 1, 2, 3]
+    else:
+        iorder = [3, 2, 1, 0]
+
+    #  Check for BOM marks (U+FEFF) in the input and adjust current
+    #  byte order setting accordingly. In native mode, the leading BOM
+    #  mark is skipped, in all other modes, it is copied to the output
+    #  stream as-is (giving a ZWNBSP character).
+    pos = 0
+    if byteorder == 'native':
+        if size >= 4:
+            bom = intmask(
+                (ord(s[iorder[3]]) << 24) | (ord(s[iorder[2]]) << 16) |
+                (ord(s[iorder[1]]) << 8) | ord(s[iorder[0]]))
+            if BYTEORDER == 'little':
+                if bom == BOM32_DIRECT:
+                    pos += 4
+                    bo = -1
+                elif bom == BOM32_REVERSE:
+                    pos += 4
+                    bo = 1
+            else:
+                if bom == BOM32_DIRECT:
+                    pos += 4
+                    bo = 1
+                elif bom == BOM32_REVERSE:
+                    pos += 4
+                    bo = -1
+    elif byteorder == 'little':
+        bo = -1
+    else:
+        bo = 1
+    if size == 0:
+        return u'', 0, bo
+    if bo == -1:
+        # force little endian
+        iorder = [0, 1, 2, 3]
+    elif bo == 1:
+        # force big endian
+        iorder = [3, 2, 1, 0]
+
+    result = UnicodeBuilder(size // 4)
+
+    while pos < size:
+        # remaining bytes at the end? (size should be divisible by 4)
+        if len(s) - pos < 4:
+            if not final:
+                break
+            r, pos = errorhandler(errors, public_encoding_name,
+                                  "truncated data",
+                                  s, pos, len(s))
+            result.append(r)
+            if len(s) - pos < 4:
+                break
+            continue
+        ch = ((ord(s[pos + iorder[3]]) << 24) | (ord(s[pos + iorder[2]]) << 
16) |
+            (ord(s[pos + iorder[1]]) << 8) | ord(s[pos + iorder[0]]))
+        if ch >= 0x110000:
+            r, pos = errorhandler(errors, public_encoding_name,
+                                  "codepoint not in range(0x110000)",
+                                  s, pos, len(s))
+            result.append(r)
+            continue
+
+        if MAXUNICODE < 65536 and ch >= 0x10000:
+            ch -= 0x10000L
+            result.append(unichr(0xD800 + (ch >> 10)))
+            result.append(unichr(0xDC00 + (ch & 0x03FF)))
+        else:
+            result.append(UNICHR(ch))
+        pos += 4
+    return result.build(), pos, bo
+
+def _STORECHAR32(result, CH, byteorder):
+    c0 = chr(((CH) >> 24) & 0xff)
+    c1 = chr(((CH) >> 16) & 0xff)
+    c2 = chr(((CH) >> 8) & 0xff)
+    c3 = chr((CH) & 0xff)
+    if byteorder == 'little':
+        result.append(c3)
+        result.append(c2)
+        result.append(c1)
+        result.append(c0)
+    else:
+        result.append(c0)
+        result.append(c1)
+        result.append(c2)
+        result.append(c3)
+
+def unicode_encode_utf_32_helper(s, size, errors,
+                                 errorhandler=None,
+                                 allow_surrogates=True,
+                                 byteorder='little',
+                                 public_encoding_name='utf32'):
+    if errorhandler is None:
+        errorhandler = default_unicode_error_encode
+    if size == 0:
+        if byteorder == 'native':
+            result = StringBuilder(4)
+            _STORECHAR32(result, 0xFEFF, BYTEORDER)
+            return result.build()
+        return ""
+
+    result = StringBuilder(size * 4 + 4)
+    if byteorder == 'native':
+        _STORECHAR32(result, 0xFEFF, BYTEORDER)
+        byteorder = BYTEORDER
+
+    pos = 0
+    while pos < size:
+        ch = ord(s[pos])
+        pos += 1
+        ch2 = 0
+        if not allow_surrogates and 0xD800 <= ch < 0xE000:
+            ru, rs, pos = errorhandler(
+                errors, public_encoding_name, 'surrogates not allowed',
+                s, pos - 1, pos)
+            if rs is not None:
+                # py3k only
+                if len(rs) % 4 != 0:
+                    errorhandler(
+                        'strict', public_encoding_name, 'surrogates not 
allowed',
+                        s, pos - 1, pos)
+                result.append(rs)
+                continue
+            for ch in ru:
+                if ord(ch) < 0xD800:
+                    _STORECHAR32(result, ord(ch), byteorder)
+                else:
+                    errorhandler(
+                        'strict', public_encoding_name,
+                        'surrogates not allowed', s, pos - 1, pos)
+            continue
+        if 0xD800 <= ch < 0xDC00 and MAXUNICODE < 65536 and pos < size:
+            ch2 = ord(s[pos])
+            if 0xDC00 <= ch2 < 0xE000:
+                ch = (((ch & 0x3FF) << 10) | (ch2 & 0x3FF)) + 0x10000
+                pos += 1
+        _STORECHAR32(result, ch, byteorder)
+
+    return result.build()
+
+def unicode_encode_utf_32(s, size, errors,
+                          errorhandler=None, allow_surrogates=True):
+    return unicode_encode_utf_32_helper(s, size, errors, errorhandler,
+                                        allow_surrogates, "native")
+
+def unicode_encode_utf_32_be(s, size, errors,
+                             errorhandler=None, allow_surrogates=True):
+    return unicode_encode_utf_32_helper(s, size, errors, errorhandler,
+                                        allow_surrogates, "big")
+
+def unicode_encode_utf_32_le(s, size, errors,
+                             errorhandler=None, allow_surrogates=True):
+    return unicode_encode_utf_32_helper(s, size, errors, errorhandler,
+                                        allow_surrogates, "little")
+
+def py3k_unicode_encode_utf_32(s, size, errors,
+                               errorhandler=None, allow_surrogates=True):
+    return unicode_encode_utf_32_helper(s, size, errors, errorhandler,
+                                        allow_surrogates, "native",
+                                        'utf-32-' + BYTEORDER2)
+
+def py3k_unicode_encode_utf_32_be(s, size, errors,
+                                  errorhandler=None, allow_surrogates=True):
+    return unicode_encode_utf_32_helper(s, size, errors, errorhandler,
+                                        allow_surrogates, "big",
+                                        'utf-32-be')
+
+def py3k_unicode_encode_utf_32_le(s, size, errors,
+                                  errorhandler=None, allow_surrogates=True):
+    return unicode_encode_utf_32_helper(s, size, errors, errorhandler,
+                                        allow_surrogates, "little",
+                                        'utf-32-le')
diff --git a/pypy/module/__builtin__/test/test_builtin.py 
b/pypy/module/__builtin__/test/test_builtin.py
--- a/pypy/module/__builtin__/test/test_builtin.py
+++ b/pypy/module/__builtin__/test/test_builtin.py
@@ -404,6 +404,7 @@
 
 
     def test_cmp(self):
+        assert cmp(float('nan'), float('nan')) == 0
         assert cmp(9,9) == 0
         assert cmp(0,9) < 0
         assert cmp(9,0) > 0
diff --git a/pypy/module/_codecs/interp_codecs.py 
b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -1,10 +1,12 @@
 from rpython.rlib import jit
 from rpython.rlib.objectmodel import we_are_translated, not_rpython
 from rpython.rlib.rstring import UnicodeBuilder
+from rpython.rlib import runicode
 from rpython.rlib.runicode import code_to_unichr, MAXUNICODE
 
 from pypy.interpreter.error import OperationError, oefmt
 from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault
+from pypy.interpreter import unicodehelper
 
 
 class VersionTag(object):
@@ -210,7 +212,8 @@
 def xmlcharrefreplace_errors(space, w_exc):
     check_exception(space, w_exc)
     if space.isinstance_w(w_exc, space.w_UnicodeEncodeError):
-        obj = space.realunicode_w(space.getattr(w_exc, 
space.newtext('object')))
+        w_obj = space.getattr(w_exc, space.newtext('object'))
+        obj = space.realunicode_w(w_obj)
         start = space.int_w(space.getattr(w_exc, space.newtext('start')))
         w_end = space.getattr(w_exc, space.newtext('end'))
         end = space.int_w(w_end)
@@ -236,7 +239,8 @@
 def backslashreplace_errors(space, w_exc):
     check_exception(space, w_exc)
     if space.isinstance_w(w_exc, space.w_UnicodeEncodeError):
-        obj = space.realunicode_w(space.getattr(w_exc, 
space.newtext('object')))
+        w_obj = space.getattr(w_exc, space.newtext('object'))
+        obj = space.realunicode_w(w_obj)
         start = space.int_w(space.getattr(w_exc, space.newtext('start')))
         w_end = space.getattr(w_exc, space.newtext('end'))
         end = space.int_w(w_end)
@@ -363,19 +367,23 @@
         raise oefmt(space.w_TypeError, "handler must be callable")
 
 # ____________________________________________________________
-# delegation to runicode
+# delegation to runicode/unicodehelper
 
-from rpython.rlib import runicode
+def _find_implementation(impl_name):
+    try:
+        func = getattr(unicodehelper, impl_name)
+    except AttributeError:
+        func = getattr(runicode, impl_name)
+    return func
 
 def make_encoder_wrapper(name):
     rname = "unicode_encode_%s" % (name.replace("_encode", ""), )
-    assert hasattr(runicode, rname)
+    func = _find_implementation(rname)
     @unwrap_spec(uni=unicode, errors='text_or_none')
     def wrap_encoder(space, uni, errors="strict"):
         if errors is None:
             errors = 'strict'
         state = space.fromcache(CodecState)
-        func = getattr(runicode, rname)
         result = func(uni, len(uni), errors, state.encode_error_handler)
         return space.newtuple([space.newbytes(result), space.newint(len(uni))])
     wrap_encoder.func_name = rname
@@ -383,7 +391,7 @@
 
 def make_decoder_wrapper(name):
     rname = "str_decode_%s" % (name.replace("_decode", ""), )
-    assert hasattr(runicode, rname)
+    func = _find_implementation(rname)
     @unwrap_spec(string='bufferstr', errors='text_or_none',
                  w_final=WrappedDefault(False))
     def wrap_decoder(space, string, errors="strict", w_final=None):
@@ -391,7 +399,6 @@
             errors = 'strict'
         final = space.is_true(w_final)
         state = space.fromcache(CodecState)
-        func = getattr(runicode, rname)
         result, consumed = func(string, len(string), errors,
                                 final, state.decode_error_handler)
         return space.newtuple([space.newunicode(result), 
space.newint(consumed)])
diff --git a/pypy/module/_codecs/test/test_codecs.py 
b/pypy/module/_codecs/test/test_codecs.py
--- a/pypy/module/_codecs/test/test_codecs.py
+++ b/pypy/module/_codecs/test/test_codecs.py
@@ -115,10 +115,10 @@
         raises(TypeError, charmap_decode, '\xff', "strict",  {0xff: 0x110000})
         assert (charmap_decode("\x00\x01\x02", "strict",
                                {0: 0x10FFFF, 1: ord('b'), 2: ord('c')}) ==
-                u"\U0010FFFFbc", 3)
+                (u"\U0010FFFFbc", 3))
         assert (charmap_decode("\x00\x01\x02", "strict",
                                {0: u'\U0010FFFF', 1: u'b', 2: u'c'}) ==
-                u"\U0010FFFFbc", 3)
+                (u"\U0010FFFFbc", 3))
 
     def test_escape_decode_errors(self):
         from _codecs import escape_decode as decode
@@ -537,8 +537,12 @@
         assert '\xff'.decode('utf-7', 'ignore') == ''
         assert '\x00'.decode('unicode-internal', 'ignore') == ''
 
-    def test_backslahreplace(self):
-        assert u'a\xac\u1234\u20ac\u8000'.encode('ascii', 'backslashreplace') 
== 'a\\xac\u1234\u20ac\u8000'
+    def test_backslashreplace(self):
+        sin = u"a\xac\u1234\u20ac\u8000\U0010ffff"
+        expected = "a\\xac\\u1234\\u20ac\\u8000\\U0010ffff"
+        assert sin.encode('ascii', 'backslashreplace') == expected
+        expected = "a\xac\\u1234\xa4\\u8000\\U0010ffff"
+        assert sin.encode("iso-8859-15", "backslashreplace") == expected
 
     def test_badhandler(self):
         import codecs
@@ -592,11 +596,11 @@
         def handler_unicodeinternal(exc):
             if not isinstance(exc, UnicodeDecodeError):
                 raise TypeError("don't know how to handle %r" % exc)
-            return (u"\x01", 1)
+            return (u"\x01", 4)
         codecs.register_error("test.hui", handler_unicodeinternal)
         res = "\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui")
         if sys.maxunicode > 65535:
-            assert res == u"\u0000\u0001\u0000"   # UCS4 build
+            assert res == u"\u0000\u0001"   # UCS4 build
         else:
             assert res == u"\x00\x00\x01\x00\x00" # UCS2 build
 
@@ -750,3 +754,31 @@
         assert _codecs.unicode_escape_decode(b) == (u'', 0)
         assert _codecs.raw_unicode_escape_decode(b) == (u'', 0)
         assert _codecs.unicode_internal_decode(b) == (u'', 0)
+
+    def test_xmlcharrefreplace(self):
+        r = u'\u1234\u0080\u2345\u0079\u00AB'.encode('latin1', 
'xmlcharrefreplace')
+        assert r == '&#4660;\x80&#9029;y\xab'
+        r = u'\u1234\u0080\u2345\u0079\u00AB'.encode('ascii', 
'xmlcharrefreplace')
+        assert r == '&#4660;&#128;&#9029;y&#171;'
+
+    def test_errorhandler_collection(self):
+        import _codecs
+        errors = []
+        def record_error(exc):
+            if not isinstance(exc, UnicodeEncodeError):
+                raise TypeError("don't know how to handle %r" % exc)
+            errors.append(exc.object[exc.start:exc.end])
+            return (u'', exc.end)
+        _codecs.register_error("test.record", record_error)
+
+        sin = u"\xac\u1234\u1234\u20ac\u8000"
+        assert sin.encode("ascii", "test.record") == ""
+        assert errors == [sin]
+
+        errors = []
+        assert sin.encode("latin-1", "test.record") == "\xac"
+        assert errors == [u'\u1234\u1234\u20ac\u8000']
+
+        errors = []
+        assert sin.encode("iso-8859-15", "test.record") == "\xac\xa4"
+        assert errors == [u'\u1234\u1234', u'\u8000']
diff --git a/pypy/module/test_lib_pypy/test_greenlet.py 
b/pypy/module/_continuation/test/test_greenlet.py
rename from pypy/module/test_lib_pypy/test_greenlet.py
rename to pypy/module/_continuation/test/test_greenlet.py
diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py
--- a/pypy/module/_io/interp_textio.py
+++ b/pypy/module/_io/interp_textio.py
@@ -353,6 +353,7 @@
         while scanned < limit:
             try:
                 ch = self.next_char()
+                scanned += 1
             except StopIteration:
                 return False
             if ch == u'\n':
@@ -413,6 +414,7 @@
     if not space.isinstance_w(w_decoded, space.w_unicode):
         msg = "decoder should return a string result, not '%T'"
         raise oefmt(space.w_TypeError, msg, w_decoded)
+    return w_decoded
 
 
 class W_TextIOWrapper(W_TextIOBase):
@@ -737,7 +739,7 @@
                     remnant = None
                     continue
 
-            if limit > 0:
+            if limit >= 0:
                 remaining = limit - builder.getlength()
                 assert remaining >= 0
             else:
@@ -939,12 +941,13 @@
 
             w_decoded = space.call_method(self.w_decoder, "decode",
                                           w_chunk, 
space.newbool(bool(cookie.need_eof)))
-            self.decoded.set(space, w_decoded)
+            w_decoded = check_decoded(space, w_decoded)
 
             # Skip chars_to_skip of the decoded characters
-            if len(self.decoded.text) < cookie.chars_to_skip:
+            if space.len_w(w_decoded) < cookie.chars_to_skip:
                 raise oefmt(space.w_IOError,
                             "can't restore logical file position")
+            self.decoded.set(space, w_decoded)
             self.decoded.pos = cookie.chars_to_skip
         else:
             self.snapshot = PositionSnapshot(cookie.dec_flags, "")
@@ -957,10 +960,8 @@
 
     def tell_w(self, space):
         self._check_closed(space)
-
         if not self.seekable:
             raise oefmt(space.w_IOError, "underlying stream is not seekable")
-
         if not self.telling:
             raise oefmt(space.w_IOError,
                         "telling position disabled by next() call")
@@ -1030,14 +1031,14 @@
                 # We didn't get enough decoded data; signal EOF to get more.
                 w_decoded = space.call_method(self.w_decoder, "decode",
                                               space.newbytes(""),
-                                              space.newint(1)) # final=1
+                                              space.newint(1))  # final=1
                 check_decoded(space, w_decoded)
-                chars_decoded += len(space.unicode_w(w_decoded))
+                chars_decoded += space.len_w(w_decoded)
                 cookie.need_eof = 1
 
                 if chars_decoded < chars_to_skip:
                     raise oefmt(space.w_IOError,
-                                "can't reconstruct logical file position")
+                        "can't reconstruct logical file position")
         finally:
             space.call_method(self.w_decoder, "setstate", w_saved_state)
 
diff --git a/pypy/module/_io/test/test_interp_textio.py 
b/pypy/module/_io/test/test_interp_textio.py
--- a/pypy/module/_io/test/test_interp_textio.py
+++ b/pypy/module/_io/test/test_interp_textio.py
@@ -1,40 +1,54 @@
 import pytest
 try:
-    from hypothesis import given, strategies as st, assume
+    from hypothesis import given, strategies as st
 except ImportError:
     pytest.skip("hypothesis required")
+import os
 from pypy.module._io.interp_bytesio import W_BytesIO
 from pypy.module._io.interp_textio import W_TextIOWrapper, DecodeBuffer
 
-LINESEP = ['', '\r', '\n', '\r\n']
+def translate_newlines(text):
+    text = text.replace(u'\r\n', u'\n')
+    text = text.replace(u'\r', u'\n')
+    return text.replace(u'\n', os.linesep)
 
 @st.composite
-def text_with_newlines(draw):
-    sep = draw(st.sampled_from(LINESEP))
-    lines = draw(st.lists(st.text(max_size=10), max_size=10))
-    return sep.join(lines)
+def st_readline(draw, st_nlines=st.integers(min_value=0, max_value=10)):
+    n_lines = draw(st_nlines)
+    fragments = []
+    limits = []
+    for _ in range(n_lines):
+        line = draw(st.text(st.characters(blacklist_characters=u'\r\n')))
+        fragments.append(line)
+        ending = draw(st.sampled_from([u'\n', u'\r', u'\r\n']))
+        fragments.append(ending)
+        limit = draw(st.integers(min_value=0, max_value=len(line) + 5))
+        limits.append(limit)
+        limits.append(-1)
+    return (u''.join(fragments), limits)
 
-@given(txt=text_with_newlines(),
-       mode=st.sampled_from(['\r', '\n', '\r\n', '']),
-       limit=st.integers(min_value=-1))
-def test_readline(space, txt, mode, limit):
-    assume(limit != 0)
+@given(data=st_readline(),
+       mode=st.sampled_from(['\r', '\n', '\r\n', '']))
+def test_readline(space, data, mode):
+    txt, limits = data
     w_stream = W_BytesIO(space)
     w_stream.descr_init(space, space.newbytes(txt.encode('utf-8')))
     w_textio = W_TextIOWrapper(space)
     w_textio.descr_init(
-        space, w_stream, encoding='utf-8',
+        space, w_stream,
+        encoding='utf-8', w_errors=space.newtext('surrogatepass'),
         w_newline=space.newtext(mode))
     lines = []
-    while True:
-        line = space.unicode_w(w_textio.readline_w(space, space.newint(limit)))
-        if limit > 0:
+    for limit in limits:
+        w_line = w_textio.readline_w(space, space.newint(limit))
+        line = space.unicode_w(w_line)
+        if limit >= 0:
             assert len(line) <= limit
         if line:
             lines.append(line)
-        else:
+        elif limit:
             break
-    assert u''.join(lines) == txt
+    assert txt.startswith(u''.join(lines))
 
 @given(st.text())
 def test_read_buffer(text):
diff --git a/pypy/module/_pypyjson/interp_decoder.py 
b/pypy/module/_pypyjson/interp_decoder.py
--- a/pypy/module/_pypyjson/interp_decoder.py
+++ b/pypy/module/_pypyjson/interp_decoder.py
@@ -71,7 +71,7 @@
         self.ll_chars = rffi.str2charp(s)
         self.end_ptr = lltype.malloc(rffi.CCHARPP.TO, 1, flavor='raw')
         self.pos = 0
-        self.cache = r_dict(slice_eq, slice_hash)
+        self.cache = r_dict(slice_eq, slice_hash, simple_hash_eq=True)
 
     def close(self):
         rffi.free_charp(self.ll_chars)
diff --git a/pypy/module/_pypyjson/interp_encoder.py 
b/pypy/module/_pypyjson/interp_encoder.py
--- a/pypy/module/_pypyjson/interp_encoder.py
+++ b/pypy/module/_pypyjson/interp_encoder.py
@@ -49,24 +49,24 @@
         first = 0
 
     for i in range(first, len(u)):
-        c = u[i]
-        if c <= u'~':
-            if c == u'"' or c == u'\\':
+        c = ord(u[i])
+        if c <= ord('~'):
+            if c == ord('"') or c == ord('\\'):
                 sb.append('\\')
-            elif c < u' ':
-                sb.append(ESCAPE_BEFORE_SPACE[ord(c)])
+            elif c < ord(' '):
+                sb.append(ESCAPE_BEFORE_SPACE[c])
                 continue
-            sb.append(chr(ord(c)))
+            sb.append(chr(c))
         else:
-            if c <= u'\uffff':
+            if c <= ord(u'\uffff'):
                 sb.append('\\u')
-                sb.append(HEX[ord(c) >> 12])
-                sb.append(HEX[(ord(c) >> 8) & 0x0f])
-                sb.append(HEX[(ord(c) >> 4) & 0x0f])
-                sb.append(HEX[ord(c) & 0x0f])
+                sb.append(HEX[c >> 12])
+                sb.append(HEX[(c >> 8) & 0x0f])
+                sb.append(HEX[(c >> 4) & 0x0f])
+                sb.append(HEX[c & 0x0f])
             else:
                 # surrogate pair
-                n = ord(c) - 0x10000
+                n = c - 0x10000
                 s1 = 0xd800 | ((n >> 10) & 0x3ff)
                 sb.append('\\ud')
                 sb.append(HEX[(s1 >> 8) & 0x0f])
diff --git a/pypy/module/_rawffi/alt/type_converter.py 
b/pypy/module/_rawffi/alt/type_converter.py
--- a/pypy/module/_rawffi/alt/type_converter.py
+++ b/pypy/module/_rawffi/alt/type_converter.py
@@ -128,7 +128,7 @@
         intval: lltype.Signed
         """
         self.error(w_ffitype, w_obj)
-        
+
     def handle_unichar(self, w_ffitype, w_obj, intval):
         """
         intval: lltype.Signed
@@ -174,7 +174,7 @@
     def handle_struct_rawffi(self, w_ffitype, w_structinstance):
         """
         This method should be killed as soon as we remove support for _rawffi 
structures
-        
+
         w_structinstance: W_StructureInstance
         """
         self.error(w_ffitype, w_structinstance)
@@ -349,7 +349,7 @@
     def get_struct_rawffi(self, w_ffitype, w_structdescr):
         """
         This should be killed as soon as we kill support for _rawffi structures
-        
+
         Return type: lltype.Unsigned
         (the address of the structure)
         """
diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py
--- a/pypy/module/_sre/interp_sre.py
+++ b/pypy/module/_sre/interp_sre.py
@@ -481,11 +481,13 @@
 
     @unwrap_spec(w_groupnum=WrappedDefault(0))
     def start_w(self, w_groupnum):
-        return self.space.newint(self.do_span(w_groupnum)[0])
+        start, end = self.do_span(w_groupnum)
+        return self.space.newint(start)
 
     @unwrap_spec(w_groupnum=WrappedDefault(0))
     def end_w(self, w_groupnum):
-        return self.space.newint(self.do_span(w_groupnum)[1])
+        start, end = self.do_span(w_groupnum)
+        return self.space.newint(end)
 
     @unwrap_spec(w_groupnum=WrappedDefault(0))
     def span_w(self, w_groupnum):
diff --git a/pypy/module/_sre/test/test_app_sre.py 
b/pypy/module/_sre/test/test_app_sre.py
--- a/pypy/module/_sre/test/test_app_sre.py
+++ b/pypy/module/_sre/test/test_app_sre.py
@@ -87,6 +87,14 @@
         assert [("a", "l"), ("u", "s")] == re.findall("b(.)(.)", "abalbus")
         assert [("a", ""), ("s", "s")] == re.findall("b(a|(s))", "babs")
 
+    def test_findall_unicode(self):
+        import re
+        assert [u"\u1234"] == re.findall(u"\u1234", u"\u1000\u1234\u2000")
+        assert ["a", "u"] == re.findall("b(.)", "abalbus")
+        assert [("a", "l"), ("u", "s")] == re.findall("b(.)(.)", "abalbus")
+        assert [("a", ""), ("s", "s")] == re.findall("b(a|(s))", "babs")
+        assert [u"xyz"] == re.findall(u".*yz", u"xyz")
+
     def test_finditer(self):
         import re
         it = re.finditer("b(.)", "brabbel")
@@ -999,3 +1007,15 @@
         import re
         assert re.search(".+ab", "wowowowawoabwowo")
         assert None == re.search(".+ab", "wowowaowowo")
+
+
+class AppTestUnicodeExtra:
+    def test_string_attribute(self):
+        import re
+        match = re.search(u"\u1234", u"\u1233\u1234\u1235")
+        assert match.string == u"\u1233\u1234\u1235"
+
+    def test_match_start(self):
+        import re
+        match = re.search(u"\u1234", u"\u1233\u1234\u1235")
+        assert match.start() == 1
diff --git a/pypy/module/cpyext/pyerrors.py b/pypy/module/cpyext/pyerrors.py
--- a/pypy/module/cpyext/pyerrors.py
+++ b/pypy/module/cpyext/pyerrors.py
@@ -122,7 +122,7 @@
     error indicator."""
     raise oefmt(space.w_TypeError, "bad argument type for built-in operation")
 
-@cpython_api([], lltype.Void)
+@cpython_api([], lltype.Void, error=None)
 def PyErr_BadInternalCall(space):
     raise oefmt(space.w_SystemError, "Bad internal call!")
 
diff --git a/pypy/module/cpyext/test/test_codecs.py 
b/pypy/module/cpyext/test/test_codecs.py
--- a/pypy/module/cpyext/test/test_codecs.py
+++ b/pypy/module/cpyext/test/test_codecs.py
@@ -11,5 +11,5 @@
         w_encoded = space.call_method(w_encoder, 'encode', 
space.wrap(u'sp&#228;m'))
         w_decoder = PyCodec_IncrementalDecoder(space, utf8, None)
         w_decoded = space.call_method(w_decoder, 'decode', w_encoded)
-        assert space.unwrap(w_decoded) == u'sp&#228;m'
+        assert space.unicode_w(w_decoded) == u'sp&#228;m'
         rffi.free_charp(utf8)
diff --git a/pypy/module/cpyext/test/test_eval.py 
b/pypy/module/cpyext/test/test_eval.py
--- a/pypy/module/cpyext/test/test_eval.py
+++ b/pypy/module/cpyext/test/test_eval.py
@@ -131,7 +131,7 @@
         finally:
             rffi.free_charp(buf)
         w_a = space.getitem(w_globals, space.wrap("a"))
-        assert space.unwrap(w_a) == u'caf\xe9'
+        assert space.unicode_w(w_a) == u'caf\xe9'
         lltype.free(flags, flavor='raw')
 
     def test_run_file(self, space):
diff --git a/pypy/module/cpyext/test/test_object.py 
b/pypy/module/cpyext/test/test_object.py
--- a/pypy/module/cpyext/test/test_object.py
+++ b/pypy/module/cpyext/test/test_object.py
@@ -8,7 +8,7 @@
 from pypy.module.cpyext.object import (
     PyObject_IsTrue, PyObject_Not, PyObject_GetAttrString,
     PyObject_DelAttrString, PyObject_GetAttr, PyObject_DelAttr,
-    PyObject_GetItem, 
+    PyObject_GetItem,
     PyObject_IsInstance, PyObject_IsSubclass, PyObject_AsFileDescriptor,
     PyObject_Hash, PyObject_Cmp, PyObject_Unicode
 )
@@ -209,9 +209,9 @@
                 PyObject_Cmp(space, w(u"\xe9"), w("\xe9"), ptr)
 
     def test_unicode(self, space, api):
-        assert space.unwrap(api.PyObject_Unicode(None)) == u"<NULL>"
-        assert space.unwrap(api.PyObject_Unicode(space.wrap([]))) == u"[]"
-        assert space.unwrap(api.PyObject_Unicode(space.wrap("e"))) == u"e"
+        assert space.unicode_w(api.PyObject_Unicode(None)) == u"<NULL>"
+        assert space.unicode_w(api.PyObject_Unicode(space.wrap([]))) == u"[]"
+        assert space.unicode_w(api.PyObject_Unicode(space.wrap("e"))) == u"e"
         with raises_w(space, UnicodeDecodeError):
             PyObject_Unicode(space, space.wrap("\xe9"))
 
@@ -562,7 +562,7 @@
                 PyObject *a = PyTuple_GetItem(args, 0);
                 PyObject *b = PyTuple_GetItem(args, 1);
                 int res = PyObject_RichCompareBool(a, b, Py_EQ);
-                return PyLong_FromLong(res);  
+                return PyLong_FromLong(res);
                 """),])
         a = float('nan')
         b = float('nan')
diff --git a/pypy/module/cpyext/test/test_pyerrors.py 
b/pypy/module/cpyext/test/test_pyerrors.py
--- a/pypy/module/cpyext/test/test_pyerrors.py
+++ b/pypy/module/cpyext/test/test_pyerrors.py
@@ -425,3 +425,15 @@
             assert orig_exc_info == reset_sys_exc_info
             assert new_exc_info == (new_exc.__class__, new_exc, None)
             assert new_exc_info == new_sys_exc_info
+
+    def test_PyErr_BadInternalCall(self):
+        # NB. it only seemed to fail when run with '-s'... but I think
+        # that it always printed stuff to stderr
+        module = self.import_extension('foo', [
+            ("oops", "METH_NOARGS",
+             r'''
+             PyErr_BadInternalCall();
+             return NULL;
+             '''),
+            ])
+        raises(SystemError, module.oops)
diff --git a/pypy/module/cpyext/test/test_unicodeobject.py 
b/pypy/module/cpyext/test/test_unicodeobject.py
--- a/pypy/module/cpyext/test/test_unicodeobject.py
+++ b/pypy/module/cpyext/test/test_unicodeobject.py
@@ -178,7 +178,7 @@
         array = rffi.cast(rffi.CWCHARP, PyUnicode_AS_DATA(space, word))
         array2 = PyUnicode_AS_UNICODE(space, word)
         array3 = PyUnicode_AsUnicode(space, word)
-        for (i, char) in enumerate(space.unwrap(word)):
+        for (i, char) in enumerate(space.unicode_w(word)):
             assert array[i] == char
             assert array2[i] == char
             assert array3[i] == char
@@ -216,12 +216,12 @@
     def test_fromstring(self, space):
         s = rffi.str2charp(u'sp\x09m'.encode("utf-8"))
         w_res = PyUnicode_FromString(space, s)
-        assert space.unwrap(w_res) == u'sp\x09m'
+        assert space.unicode_w(w_res) == u'sp\x09m'
 
         res = PyUnicode_FromStringAndSize(space, s, 4)
         w_res = from_ref(space, res)
         Py_DecRef(space, res)
-        assert space.unwrap(w_res) == u'sp\x09m'
+        assert space.unicode_w(w_res) == u'sp\x09m'
         rffi.free_charp(s)
 
     def test_unicode_resize(self, space):
@@ -256,17 +256,17 @@
         u = rffi.str2charp(u'sp\x134m'.encode("utf-8"))
         w_u = PyUnicode_DecodeUTF8(space, u, 5, None)
         assert space.type(w_u) is space.w_unicode
-        assert space.unwrap(w_u) == u'sp\x134m'
+        assert space.unicode_w(w_u) == u'sp\x134m'
 
         w_u = PyUnicode_DecodeUTF8(space, u, 2, None)
         assert space.type(w_u) is space.w_unicode
-        assert space.unwrap(w_u) == 'sp'
+        assert space.unicode_w(w_u) == 'sp'
         rffi.free_charp(u)
 
     def test_encode_utf8(self, space):
         u = rffi.unicode2wcharp(u'sp\x09m')
         w_s = PyUnicode_EncodeUTF8(space, u, 4, None)
-        assert space.unwrap(w_s) == u'sp\x09m'.encode('utf-8')
+        assert space.unicode_w(w_s) == u'sp\x09m'.encode('utf-8')
         rffi.free_wcharp(u)
 
     def test_encode_decimal(self, space):
@@ -364,18 +364,18 @@
     def test_fromobject(self, space):
         w_u = space.wrap(u'a')
         assert PyUnicode_FromObject(space, w_u) is w_u
-        assert space.unwrap(
+        assert space.unicode_w(
             PyUnicode_FromObject(space, space.wrap('test'))) == 'test'
 
     def test_decode(self, space):
         b_text = rffi.str2charp('caf\x82xx')
         b_encoding = rffi.str2charp('cp437')
-        assert space.unwrap(
+        assert space.unicode_w(
             PyUnicode_Decode(space, b_text, 4, b_encoding, None)) == u'caf\xe9'
 
         w_text = PyUnicode_FromEncodedObject(space, space.wrap("test"), 
b_encoding, None)
         assert space.isinstance_w(w_text, space.w_unicode)
-        assert space.unwrap(w_text) == "test"
+        assert space.unicode_w(w_text) == "test"
 
         with raises_w(space, TypeError):
             PyUnicode_FromEncodedObject(space, space.wrap(u"test"),
@@ -391,7 +391,8 @@
         u_text = u'abcdefg'
         s_text = space.str_w(PyUnicode_AsEncodedString(space, 
space.wrap(u_text), null_charp, null_charp))
         b_text = rffi.str2charp(s_text)
-        assert space.unwrap(PyUnicode_Decode(space, b_text, len(s_text), 
null_charp, null_charp)) == u_text
+        assert space.unicode_w(PyUnicode_Decode(
+            space, b_text, len(s_text), null_charp, null_charp)) == u_text
         with raises_w(space, TypeError):
             PyUnicode_FromEncodedObject(
                 space, space.wrap(u_text), null_charp, None)
@@ -508,7 +509,7 @@
 
     def test_concat(self, space):
         w_res = PyUnicode_Concat(space, space.wrap(u'a'), space.wrap(u'b'))
-        assert space.unwrap(w_res) == u'ab'
+        assert space.unicode_w(w_res) == u'ab'
 
     def test_copy(self, space):
         w_x = space.wrap(u"abcd\u0660")
@@ -579,29 +580,30 @@
         w_format = space.wrap(u'hi %s')
         w_args = space.wrap((u'test',))
         w_formated = PyUnicode_Format(space, w_format, w_args)
-        assert space.unwrap(w_formated) == space.unwrap(space.mod(w_format, 
w_args))
+        assert (space.unicode_w(w_formated) ==
+                space.unicode_w(space.mod(w_format, w_args)))
 
     def test_join(self, space):
         w_sep = space.wrap(u'<sep>')
         w_seq = space.wrap([u'a', u'b'])
         w_joined = PyUnicode_Join(space, w_sep, w_seq)
-        assert space.unwrap(w_joined) == u'a<sep>b'
+        assert space.unicode_w(w_joined) == u'a<sep>b'
 
     def test_fromordinal(self, space):
         w_char = PyUnicode_FromOrdinal(space, 65)
-        assert space.unwrap(w_char) == u'A'
+        assert space.unicode_w(w_char) == u'A'
         w_char = PyUnicode_FromOrdinal(space, 0)
-        assert space.unwrap(w_char) == u'\0'
+        assert space.unicode_w(w_char) == u'\0'
         w_char = PyUnicode_FromOrdinal(space, 0xFFFF)
-        assert space.unwrap(w_char) == u'\uFFFF'
+        assert space.unicode_w(w_char) == u'\uFFFF'
 
     def test_replace(self, space):
         w_str = space.wrap(u"abababab")
         w_substr = space.wrap(u"a")
         w_replstr = space.wrap(u"z")
-        assert u"zbzbabab" == space.unwrap(
+        assert u"zbzbabab" == space.unicode_w(
             PyUnicode_Replace(space, w_str, w_substr, w_replstr, 2))
-        assert u"zbzbzbzb" == space.unwrap(
+        assert u"zbzbzbzb" == space.unicode_w(
             PyUnicode_Replace(space, w_str, w_substr, w_replstr, -1))
 
     def test_tailmatch(self, space):
diff --git a/pypy/module/cpyext/unicodeobject.py 
b/pypy/module/cpyext/unicodeobject.py
--- a/pypy/module/cpyext/unicodeobject.py
+++ b/pypy/module/cpyext/unicodeobject.py
@@ -12,6 +12,7 @@
 from pypy.module.cpyext.bytesobject import PyString_Check
 from pypy.module.sys.interp_encoding import setdefaultencoding
 from pypy.module._codecs.interp_codecs import CodecState
+from pypy.interpreter import unicodehelper
 from pypy.objspace.std import unicodeobject
 from rpython.rlib import rstring, runicode
 from rpython.tool.sourcetools import func_renamer
@@ -620,7 +621,7 @@
     else:
         errors = None
 
-    result, length, byteorder = runicode.str_decode_utf_32_helper(
+    result, length, byteorder = unicodehelper.str_decode_utf_32_helper(
         string, size, errors,
         True, # final ? false for multiple passes?
         None, # errorhandler
diff --git a/pypy/module/posix/test/test_posix2.py 
b/pypy/module/posix/test/test_posix2.py
--- a/pypy/module/posix/test/test_posix2.py
+++ b/pypy/module/posix/test/test_posix2.py
@@ -31,9 +31,15 @@
     pdir.join('file2').write("test2")
     pdir.join('another_longer_file_name').write("test3")
     mod.pdir = pdir
-    unicode_dir = udir.ensure('fi\xc5\x9fier.txt', dir=True)
+    if sys.platform == 'darwin':
+        # see issue https://bugs.python.org/issue31380
+        unicode_dir = udir.ensure('fixc5x9fier.txt', dir=True)
+        file_name = 'cafxe9'
+    else:
+        unicode_dir = udir.ensure('fi\xc5\x9fier.txt', dir=True)
+        file_name = 'caf\xe9'
     unicode_dir.join('somefile').write('who cares?')
-    unicode_dir.join('caf\xe9').write('who knows?')
+    unicode_dir.join(file_name).write('who knows?')
     mod.unicode_dir = unicode_dir
 
     # in applevel tests, os.stat uses the CPython os.stat.
diff --git a/pypy/module/time/interp_time.py b/pypy/module/time/interp_time.py
--- a/pypy/module/time/interp_time.py
+++ b/pypy/module/time/interp_time.py
@@ -310,12 +310,19 @@
     errno = rposix.get_saved_errno()
     return os.strerror(errno)
 
+def _check_sleep_arg(space, secs):
+    from rpython.rlib.rfloat import isinf, isnan
+    if secs < 0:
+        raise oefmt(space.w_IOError,
+                    "Invalid argument: negative time in sleep")
+    if isinf(secs) or isnan(secs):
+        raise oefmt(space.w_IOError,
+                    "Invalid argument: inf or nan")
+
 if sys.platform != 'win32':
     @unwrap_spec(secs=float)
     def sleep(space, secs):
-        if secs < 0:
-            raise oefmt(space.w_IOError,
-                        "Invalid argument: negative time in sleep")
+        _check_sleep_arg(space, secs)
         rtime.sleep(secs)
 else:
     from rpython.rlib import rwin32
@@ -336,9 +343,7 @@
                                    OSError(EINTR, "sleep() interrupted"))
     @unwrap_spec(secs=float)
     def sleep(space, secs):
-        if secs < 0:
-            raise oefmt(space.w_IOError,
-                        "Invalid argument: negative time in sleep")
+        _check_sleep_arg(space, secs)
         # as decreed by Guido, only the main thread can be
         # interrupted.
         main_thread = space.fromcache(State).main_thread
diff --git a/pypy/module/time/test/test_time.py 
b/pypy/module/time/test/test_time.py
--- a/pypy/module/time/test/test_time.py
+++ b/pypy/module/time/test/test_time.py
@@ -19,6 +19,8 @@
         raises(TypeError, time.sleep, "foo")
         time.sleep(0.12345)
         raises(IOError, time.sleep, -1.0)
+        raises(IOError, time.sleep, float('nan'))
+        raises(IOError, time.sleep, float('inf'))
 
     def test_clock(self):
         import time
diff --git a/pypy/module/unicodedata/interp_ucd.py 
b/pypy/module/unicodedata/interp_ucd.py
--- a/pypy/module/unicodedata/interp_ucd.py
+++ b/pypy/module/unicodedata/interp_ucd.py
@@ -259,10 +259,10 @@
                 result[0] = ch
 
         if not composed: # If decomposed normalization we are done
-            return space.newunicode(u''.join([unichr(i) for i in result[:j]]))
+            return self.build(space, result, stop=j)
 
         if j <= 1:
-            return space.newunicode(u''.join([unichr(i) for i in result[:j]]))
+            return self.build(space, result, stop=j)
 
         current = result[0]
         starter_pos = 0
@@ -310,7 +310,10 @@
 
         result[starter_pos] = current
 
-        return space.newunicode(u''.join([unichr(i) for i in 
result[:next_insert]]))
+        return self.build(space, result, stop=next_insert)
+
+    def build(self, space, r, stop):
+        return space.newunicode(u''.join([unichr(i) for i in r[:stop]]))
 
 
 methods = {}
diff --git a/pypy/module/unicodedata/test/test_hyp.py 
b/pypy/module/unicodedata/test/test_hyp.py
--- a/pypy/module/unicodedata/test/test_hyp.py
+++ b/pypy/module/unicodedata/test/test_hyp.py
@@ -10,7 +10,7 @@
     def normalize(s):
         w_s = space.newunicode(s)
         w_res = ucd.normalize(space, NF_code, w_s)
-        return space.unwrap(w_res)
+        return space.unicode_w(w_res)
     return normalize
 
 all_forms = ['NFC', 'NFD', 'NFKC', 'NFKD']
diff --git a/pypy/objspace/std/test/test_unicodeobject.py 
b/pypy/objspace/std/test/test_unicodeobject.py
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -135,6 +135,11 @@
         check(u'a' + 'b', u'ab')
         check('a' + u'b', u'ab')
 
+    def test_getitem(self):
+        assert u'abc'[2] == 'c'
+        raises(IndexError, u'abc'.__getitem__, 15)
+        assert u'g\u0105\u015b\u0107'[2] == u'\u015b'
+
     def test_join(self):
         def check(a, b):
             assert a == b
@@ -171,6 +176,8 @@
         assert u'\n\n'.splitlines() == [u'', u'']
         assert u'a\nb\nc'.splitlines(1) == [u'a\n', u'b\n', u'c']
         assert u'\na\nb\n'.splitlines(1) == [u'\n', u'a\n', u'b\n']
+        assert ((u'a' + '\xc2\x85'.decode('utf8') + u'b\n').splitlines() ==
+                ['a', 'b'])
 
     def test_zfill(self):
         assert u'123'.zfill(2) == u'123'
@@ -217,6 +224,7 @@
         raises(ValueError, u'abc'.split, u'')
         raises(ValueError, 'abc'.split, u'')
         assert u'   a b c d'.split(None, 0) == [u'a b c d']
+        assert u'a\nb\u1680c'.split() == [u'a', u'b', u'c']
 
     def test_rsplit(self):
         assert u"".rsplit() == []
@@ -246,6 +254,7 @@
         raises(ValueError, 'abc'.rsplit, u'')
         assert u'  a b c  '.rsplit(None, 0) == [u'  a b c']
         assert u''.rsplit('aaa') == [u'']
+        assert u'a\nb\u1680c'.rsplit() == [u'a', u'b', u'c']
 
     def test_split_rsplit_str_unicode(self):
         x = 'abc'.split(u'b')
@@ -291,6 +300,8 @@
         assert u"bROWN fOX".title() == u"Brown Fox"
         assert u"Brown Fox".title() == u"Brown Fox"
         assert u"bro!wn fox".title() == u"Bro!Wn Fox"
+        assert u"brow\u4321n fox".title() == u"Brow\u4321N Fox"
+        assert u'\ud800'.title() == u'\ud800'
 
     def test_istitle(self):
         assert u"".istitle() == False
@@ -315,6 +326,18 @@
         assert not u'\u01c5abc'.islower()
         assert not u'\u01c5ABC'.isupper()
 
+    def test_lower_upper(self):
+        assert u'a'.lower() == u'a'
+        assert u'A'.lower() == u'a'
+        assert u'\u0105'.lower() == u'\u0105'
+        assert u'\u0104'.lower() == u'\u0105'
+        assert u'\ud800'.lower() == u'\ud800'
+        assert u'a'.upper() == u'A'
+        assert u'A'.upper() == u'A'
+        assert u'\u0105'.upper() == u'\u0104'
+        assert u'\u0104'.upper() == u'\u0104'
+        assert u'\ud800'.upper() == u'\ud800'
+
     def test_capitalize(self):
         assert u"brown fox".capitalize() == u"Brown fox"
         assert u' hello '.capitalize() == u' hello '
@@ -336,6 +359,8 @@
         # check with Ll chars with no upper - nothing changes here
         assert (u'\u019b\u1d00\u1d86\u0221\u1fb7'.capitalize() ==
                 u'\u019b\u1d00\u1d86\u0221\u1fb7')
+        assert u'\ud800'.capitalize() == u'\ud800'
+        assert u'xx\ud800'.capitalize() == u'Xx\ud800'
 
     def test_rjust(self):
         s = u"abc"
@@ -376,6 +401,16 @@
         assert u'one!two!three!'.replace('x', '@') == u'one!two!three!'
         assert u'one!two!three!'.replace(u'x', '@', 2) == u'one!two!three!'
         assert u'abc'.replace('', u'-') == u'-a-b-c-'
+        assert u'\u1234'.replace(u'', '-') == u'-\u1234-'
+        assert u'\u0234\u5678'.replace('', u'-') == u'-\u0234-\u5678-'
+        assert u'\u0234\u5678'.replace('', u'-', 0) == u'\u0234\u5678'
+        assert u'\u0234\u5678'.replace('', u'-', 1) == u'-\u0234\u5678'
+        assert u'\u0234\u5678'.replace('', u'-', 2) == u'-\u0234-\u5678'
+        assert u'\u0234\u5678'.replace('', u'-', 3) == u'-\u0234-\u5678-'
+        assert u'\u0234\u5678'.replace('', u'-', 4) == u'-\u0234-\u5678-'
+        assert u'\u0234\u5678'.replace('', u'-', 700) == u'-\u0234-\u5678-'
+        assert u'\u0234\u5678'.replace('', u'-', -1) == u'-\u0234-\u5678-'
+        assert u'\u0234\u5678'.replace('', u'-', -42) == u'-\u0234-\u5678-'
         assert u'abc'.replace(u'', u'-', 3) == u'-a-b-c'
         assert u'abc'.replace('', '-', 0) == u'abc'
         assert u''.replace(u'', '') == u''
@@ -479,6 +514,9 @@
         assert u''.startswith(u'a') is False
         assert u'x'.startswith(u'xx') is False
         assert u'y'.startswith(u'xx') is False
+        assert u'\u1234\u5678\u4321'.startswith(u'\u1234') is True
+        assert u'\u1234\u5678\u4321'.startswith(u'\u1234\u4321') is False
+        assert u'\u1234'.startswith(u'', 1, 0) is True
 
     def test_startswith_more(self):
         assert u'ab'.startswith(u'a', 0) is True
@@ -589,7 +627,7 @@
         raises(TypeError, u'hello'.translate)
         raises(TypeError, u'abababc'.translate, {ord('a'):''})
 
-    def test_unicode_form_encoded_object(self):
+    def test_unicode_from_encoded_object(self):
         assert unicode('x', 'utf-8') == u'x'
         assert unicode('x', 'utf-8', 'strict') == u'x'
 
@@ -634,6 +672,8 @@
         assert u'\u20ac'.encode('utf-8') == '\xe2\x82\xac'
         assert u'\ud800\udc02'.encode('utf-8') == '\xf0\x90\x80\x82'
         assert u'\ud84d\udc56'.encode('utf-8') == '\xf0\xa3\x91\x96'
+        assert u'\ud800\udc02'.encode('uTf-8') == '\xf0\x90\x80\x82'
+        assert u'\ud84d\udc56'.encode('Utf8') == '\xf0\xa3\x91\x96'
         assert u'\ud800'.encode('utf-8') == '\xed\xa0\x80'
         assert u'\udc00'.encode('utf-8') == '\xed\xb0\x80'
         assert (u'\ud800\udc02'*1000).encode('utf-8') == 
'\xf0\x90\x80\x82'*1000
@@ -745,6 +785,7 @@
     def test_index(self):
         assert u"rrarrrrrrrrra".index(u'a', 4, None) == 12
         assert u"rrarrrrrrrrra".index(u'a', None, 6) == 2
+        assert u"\u1234\u4321\u5678".index(u'\u5678', 1) == 2
 
     def test_rindex(self):
         from sys import maxint
@@ -754,6 +795,7 @@
         assert u'abcdefghiabc'.rindex(u'abc', 0, -1) == 0
         assert u'abcdefghiabc'.rindex(u'abc', -4*maxint, 4*maxint) == 9
         assert u'rrarrrrrrrrra'.rindex(u'a', 4, None) == 12
+        assert u"\u1234\u5678".rindex(u'\u5678') == 1
 
         raises(ValueError, u'abcdefghiabc'.rindex, u'hib')
         raises(ValueError, u'defghiabc'.rindex, u'def', 1)
@@ -768,12 +810,15 @@
         assert u'abcdefghiabc'.rfind(u'') == 12
         assert u'abcdefghiabc'.rfind(u'abcd') == 0
         assert u'abcdefghiabc'.rfind(u'abcz') == -1
+        assert u"\u1234\u5678".rfind(u'\u5678') == 1
 
     def test_rfind_corner_case(self):
         assert u'abc'.rfind('', 4) == -1
 
     def test_find_index_str_unicode(self):
-        assert 'abcdefghiabc'.find(u'bc') == 1
+        assert u'abcdefghiabc'.find(u'bc') == 1
+        assert u'ab\u0105b\u0107'.find('b', 2) == 3
+        assert u'ab\u0105b\u0107'.find('b', 0, 1) == -1
         assert 'abcdefghiabc'.rfind(u'abc') == 9
         raises(UnicodeDecodeError, '\x80'.find, u'')
         raises(UnicodeDecodeError, '\x80'.rfind, u'')
@@ -781,6 +826,7 @@
         assert 'abcdefghiabc'.rindex(u'abc') == 9
         raises(UnicodeDecodeError, '\x80'.index, u'')
         raises(UnicodeDecodeError, '\x80'.rindex, u'')
+        assert u"\u1234\u5678".find(u'\u5678') == 1
 
     def test_count(self):
         assert u"".count(u"x") ==0
@@ -807,6 +853,7 @@
 
     def test_swapcase(self):
         assert u'\xe4\xc4\xdf'.swapcase() == u'\xc4\xe4\xdf'
+        assert u'\ud800'.swapcase() == u'\ud800'
 
     def test_buffer(self):
         buf = buffer(u'XY')
@@ -878,16 +925,31 @@
 
     def test_getslice(self):
         assert u'123456'.__getslice__(1, 5) == u'2345'
-        s = u"abc"
-        assert s[:] == "abc"
-        assert s[1:] == "bc"
-        assert s[:2] == "ab"
-        assert s[1:2] == "b"
-        assert s[-2:] == "bc"
-        assert s[:-1] == "ab"
-        assert s[-2:2] == "b"
-        assert s[1:-1] == "b"
-        assert s[-2:-1] == "b"
+        s = u"\u0105b\u0107"
+        assert s[:] == u"\u0105b\u0107"
+        assert s[1:] == u"b\u0107"
+        assert s[:2] == u"\u0105b"
+        assert s[1:2] == u"b"
+        assert s[-2:] == u"b\u0107"
+        assert s[:-1] == u"\u0105b"
+        assert s[-2:2] == u"b"
+        assert s[1:-1] == u"b"
+        assert s[-2:-1] == u"b"
+
+    def test_getitem_slice(self):
+        assert u'123456'.__getitem__(slice(1, 5)) == u'2345'
+        s = u"\u0105b\u0107"
+        assert s[slice(3)] == u"\u0105b\u0107"
+        assert s[slice(1, 3)] == u"b\u0107"
+        assert s[slice(2)] == u"\u0105b"
+        assert s[slice(1,2)] == u"b"
+        assert s[slice(-2,3)] == u"b\u0107"
+        assert s[slice(-1)] == u"\u0105b"
+        assert s[slice(-2,2)] == u"b"
+        assert s[slice(1,-1)] == u"b"
+        assert s[slice(-2,-1)] == u"b"
+        assert u"abcde"[::2] == u"ace"
+        assert u"\u0105\u0106\u0107abcd"[::2] == u"\u0105\u0107bd"
 
     def test_no_len_on_str_iter(self):
         iterable = u"hello"
diff --git a/pypy/tool/release/force-builds.py 
b/pypy/tool/release/force-builds.py
--- a/pypy/tool/release/force-builds.py
+++ b/pypy/tool/release/force-builds.py
@@ -29,7 +29,6 @@
     'pypy-c-jit-macosx-x86-64',
     'pypy-c-jit-win-x86-32',
     'pypy-c-jit-linux-s390x',
-    'build-pypy-c-jit-linux-armhf-raring',
     'build-pypy-c-jit-linux-armhf-raspbian',
     'build-pypy-c-jit-linux-armel',
 ]
diff --git a/pypy/tool/release/repackage.sh b/pypy/tool/release/repackage.sh
--- a/pypy/tool/release/repackage.sh
+++ b/pypy/tool/release/repackage.sh
@@ -23,7 +23,7 @@
 
 # Download latest builds from the buildmaster, rename the top
 # level directory, and repackage ready to be uploaded to bitbucket
-for plat in linux linux64 linux-armhf-raspbian linux-armhf-raring linux-armel 
osx64 s390x
+for plat in linux linux64 linux-armhf-raspbian linux-armel osx64 s390x
   do
     echo downloading package for $plat
     if wget -q --show-progress 
http://buildbot.pypy.org/nightly/$branchname/pypy-c-jit-latest-$plat.tar.bz2
diff --git a/rpython/annotator/bookkeeper.py b/rpython/annotator/bookkeeper.py
--- a/rpython/annotator/bookkeeper.py
+++ b/rpython/annotator/bookkeeper.py
@@ -194,13 +194,14 @@
             listdef.generalize_range_step(flags['range_step'])
         return SomeList(listdef)
 
-    def getdictdef(self, is_r_dict=False, force_non_null=False):
+    def getdictdef(self, is_r_dict=False, force_non_null=False, 
simple_hash_eq=False):
         """Get the DictDef associated with the current position."""
         try:
             dictdef = self.dictdefs[self.position_key]
         except KeyError:
             dictdef = DictDef(self, is_r_dict=is_r_dict,
-                              force_non_null=force_non_null)
+                              force_non_null=force_non_null,
+                              simple_hash_eq=simple_hash_eq)
             self.dictdefs[self.position_key] = dictdef
         return dictdef
 
diff --git a/rpython/annotator/builtin.py b/rpython/annotator/builtin.py
--- a/rpython/annotator/builtin.py
+++ b/rpython/annotator/builtin.py
@@ -237,22 +237,30 @@
     return SomeInstance(clsdef)
 
 @analyzer_for(rpython.rlib.objectmodel.r_dict)
-def robjmodel_r_dict(s_eqfn, s_hashfn, s_force_non_null=None):
+def robjmodel_r_dict(s_eqfn, s_hashfn, s_force_non_null=None, 
s_simple_hash_eq=None):
+    return _r_dict_helper(SomeDict, s_eqfn, s_hashfn, s_force_non_null, 
s_simple_hash_eq)
+
+@analyzer_for(rpython.rlib.objectmodel.r_ordereddict)
+def robjmodel_r_ordereddict(s_eqfn, s_hashfn, s_force_non_null=None, 
s_simple_hash_eq=None):
+    return _r_dict_helper(SomeOrderedDict, s_eqfn, s_hashfn,
+                          s_force_non_null, s_simple_hash_eq)
+
+def _r_dict_helper(cls, s_eqfn, s_hashfn, s_force_non_null, s_simple_hash_eq):
     if s_force_non_null is None:
         force_non_null = False
     else:
         assert s_force_non_null.is_constant()
         force_non_null = s_force_non_null.const
+    if s_simple_hash_eq is None:
+        simple_hash_eq = False
+    else:
+        assert s_simple_hash_eq.is_constant()
+        simple_hash_eq = s_simple_hash_eq.const
     dictdef = getbookkeeper().getdictdef(is_r_dict=True,
-                                         force_non_null=force_non_null)
+                                         force_non_null=force_non_null,
+                                         simple_hash_eq=simple_hash_eq)
     dictdef.dictkey.update_rdict_annotations(s_eqfn, s_hashfn)
-    return SomeDict(dictdef)
-
-@analyzer_for(rpython.rlib.objectmodel.r_ordereddict)
-def robjmodel_r_ordereddict(s_eqfn, s_hashfn):
-    dictdef = getbookkeeper().getdictdef(is_r_dict=True)
-    dictdef.dictkey.update_rdict_annotations(s_eqfn, s_hashfn)
-    return SomeOrderedDict(dictdef)
+    return cls(dictdef)
 
 @analyzer_for(rpython.rlib.objectmodel.hlinvoke)
 def robjmodel_hlinvoke(s_repr, s_llcallable, *args_s):
diff --git a/rpython/annotator/dictdef.py b/rpython/annotator/dictdef.py
--- a/rpython/annotator/dictdef.py
+++ b/rpython/annotator/dictdef.py
@@ -81,12 +81,14 @@
     def __init__(self, bookkeeper, s_key = s_ImpossibleValue,
                                  s_value = s_ImpossibleValue,
                                is_r_dict = False,
-                           force_non_null = False):
+                           force_non_null = False,
+                           simple_hash_eq = False):
         self.dictkey = DictKey(bookkeeper, s_key, is_r_dict)
         self.dictkey.itemof[self] = True
         self.dictvalue = DictValue(bookkeeper, s_value)
         self.dictvalue.itemof[self] = True
         self.force_non_null = force_non_null
+        self.simple_hash_eq = simple_hash_eq
 
     def read_key(self, position_key):
         self.dictkey.read_locations.add(position_key)
diff --git a/rpython/jit/metainterp/optimizeopt/intbounds.py 
b/rpython/jit/metainterp/optimizeopt/intbounds.py
--- a/rpython/jit/metainterp/optimizeopt/intbounds.py
+++ b/rpython/jit/metainterp/optimizeopt/intbounds.py
@@ -25,19 +25,6 @@
         return (1 << ((byte_size << 3) - 1)) - 1
 
 
-IS_64_BIT = sys.maxint > 2**32
-
-def next_pow2_m1(n):
-    """Calculate next power of 2 greater than n minus one."""
-    n |= n >> 1
-    n |= n >> 2
-    n |= n >> 4
-    n |= n >> 8
-    n |= n >> 16
-    if IS_64_BIT:
-        n |= n >> 32
-    return n
-
 
 class OptIntBounds(Optimization):
     """Keeps track of the bounds placed on integers by guards and remove
@@ -50,7 +37,7 @@
         return dispatch_postprocess(self, op)
 
     def propagate_bounds_backward(self, box):
-        # FIXME: This takes care of the instruction where box is the reuslt
+        # FIXME: This takes care of the instruction where box is the result
         #        but the bounds produced by all instructions where box is
         #        an argument might also be tighten
         b = self.getintbound(box)
@@ -91,14 +78,8 @@
         b1 = self.getintbound(v1)
         v2 = self.get_box_replacement(op.getarg(1))
         b2 = self.getintbound(v2)
-        if b1.known_ge(IntBound(0, 0)) and \
-           b2.known_ge(IntBound(0, 0)):
-            r = self.getintbound(op)
-            if b1.has_upper and b2.has_upper:
-                mostsignificant = b1.upper | b2.upper
-                r.intersect(IntBound(0, next_pow2_m1(mostsignificant)))
-            else:
-                r.make_ge(IntBound(0, 0))
+        b = b1.or_bound(b2)
+        self.getintbound(op).intersect(b)
 
     optimize_INT_OR = optimize_INT_OR_or_XOR
     optimize_INT_XOR = optimize_INT_OR_or_XOR
@@ -112,15 +93,8 @@
     def postprocess_INT_AND(self, op):
         b1 = self.getintbound(op.getarg(0))
         b2 = self.getintbound(op.getarg(1))
-        r = self.getintbound(op)
-        pos1 = b1.known_ge(IntBound(0, 0))
-        pos2 = b2.known_ge(IntBound(0, 0))
-        if pos1 or pos2:
-            r.make_ge(IntBound(0, 0))
-        if pos1:
-            r.make_le(b1)
-        if pos2:
-            r.make_le(b2)
+        b = b1.and_bound(b2)
+        self.getintbound(op).intersect(b)
 
     def optimize_INT_SUB(self, op):
         return self.emit(op)
@@ -211,16 +185,10 @@
         r.intersect(b1.py_div_bound(b2))
 
     def post_call_INT_PY_MOD(self, op):
+        b1 = self.getintbound(op.getarg(1))
         b2 = self.getintbound(op.getarg(2))
-        if b2.is_constant():
-            val = b2.getint()
-            r = self.getintbound(op)
-            if val >= 0:        # with Python's modulo:  0 <= (x % pos) < pos
-                r.make_ge(IntBound(0, 0))
-                r.make_lt(IntBound(val, val))
-            else:               # with Python's modulo:  neg < (x % neg) <= 0
-                r.make_gt(IntBound(val, val))
-                r.make_le(IntBound(0, 0))
+        r = self.getintbound(op)
+        r.intersect(b1.mod_bound(b2))
 
     def optimize_INT_LSHIFT(self, op):
         return self.emit(op)
@@ -436,7 +404,7 @@
 
     def optimize_INT_FORCE_GE_ZERO(self, op):
         b = self.getintbound(op.getarg(0))
-        if b.known_ge(IntBound(0, 0)):
+        if b.known_nonnegative():
             self.make_equal_to(op, op.getarg(0))
         else:
             return self.emit(op)
@@ -647,7 +615,7 @@
         if r.is_constant():
             if r.getint() == valnonzero:
                 b1 = self.getintbound(op.getarg(0))
-                if b1.known_ge(IntBound(0, 0)):
+                if b1.known_nonnegative():
                     b1.make_gt(IntBound(0, 0))
                     self.propagate_bounds_backward(op.getarg(0))
             elif r.getint() == valzero:
diff --git a/rpython/jit/metainterp/optimizeopt/intutils.py 
b/rpython/jit/metainterp/optimizeopt/intutils.py
--- a/rpython/jit/metainterp/optimizeopt/intutils.py
+++ b/rpython/jit/metainterp/optimizeopt/intutils.py
@@ -12,6 +12,19 @@
 MAXINT = maxint
 MININT = -maxint - 1
 
+IS_64_BIT = sys.maxint > 2**32
+
+def next_pow2_m1(n):
+    """Calculate next power of 2 greater than n minus one."""
+    n |= n >> 1
+    n |= n >> 2
+    n |= n >> 4
+    n |= n >> 8
+    n |= n >> 16
+    if IS_64_BIT:
+        n |= n >> 32
+    return n
+
 
 class IntBound(AbstractInfo):
     _attrs_ = ('has_upper', 'has_lower', 'upper', 'lower')
@@ -92,6 +105,9 @@
     def known_ge(self, other):
         return other.known_le(self)
 
+    def known_nonnegative(self):
+        return self.has_lower and 0 <= self.lower
+
     def intersect(self, other):
         r = False
 
@@ -192,10 +208,22 @@
         else:
             return IntUnbounded()
 
+    def mod_bound(self, other):
+        r = IntUnbounded()
+        if other.is_constant():
+            val = other.getint()
+            if val >= 0:        # with Python's modulo:  0 <= (x % pos) < pos
+                r.make_ge(IntBound(0, 0))
+                r.make_lt(IntBound(val, val))
+            else:               # with Python's modulo:  neg < (x % neg) <= 0
+                r.make_gt(IntBound(val, val))
+                r.make_le(IntBound(0, 0))
+        return r
+
     def lshift_bound(self, other):
         if self.has_upper and self.has_lower and \
            other.has_upper and other.has_lower and \
-           other.known_ge(IntBound(0, 0)) and \
+           other.known_nonnegative() and \
            other.known_lt(IntBound(LONG_BIT, LONG_BIT)):
             try:
                 vals = (ovfcheck(self.upper << other.upper),
@@ -211,7 +239,7 @@
     def rshift_bound(self, other):
         if self.has_upper and self.has_lower and \
            other.has_upper and other.has_lower and \
-           other.known_ge(IntBound(0, 0)) and \
+           other.known_nonnegative() and \
            other.known_lt(IntBound(LONG_BIT, LONG_BIT)):
             vals = (self.upper >> other.upper,
                     self.upper >> other.lower,
@@ -221,7 +249,32 @@
         else:
             return IntUnbounded()
 
+    def and_bound(self, other):
+        pos1 = self.known_nonnegative()
+        pos2 = other.known_nonnegative()
+        r = IntUnbounded()
+        if pos1 or pos2:
+            r.make_ge(IntBound(0, 0))
+        if pos1:
+            r.make_le(self)
+        if pos2:
+            r.make_le(other)
+        return r
+
+    def or_bound(self, other):
+        r = IntUnbounded()
+        if self.known_nonnegative() and \
+                other.known_nonnegative():
+            if self.has_upper and other.has_upper:
+                mostsignificant = self.upper | other.upper
+                r.intersect(IntBound(0, next_pow2_m1(mostsignificant)))
+            else:
+                r.make_ge(IntBound(0, 0))
+        return r
+
     def contains(self, val):
+        if not we_are_translated():
+            assert not isinstance(val, long)
         if not isinstance(val, int):
             if ((not self.has_lower or self.lower == MININT) and
                 not self.has_upper or self.upper == MAXINT):
@@ -282,7 +335,7 @@
             guards.append(op)
 
     def is_bool(self):
-        return (self.bounded() and self.known_ge(ConstIntBound(0)) and
+        return (self.bounded() and self.known_nonnegative() and
                 self.known_le(ConstIntBound(1)))
 
     def make_bool(self):
@@ -297,7 +350,7 @@
         if self.known_gt(IntBound(0, 0)) or \
            self.known_lt(IntBound(0, 0)):
             return INFO_NONNULL
-        if self.known_ge(IntBound(0, 0)) and \
+        if self.known_nonnegative() and \
            self.known_le(IntBound(0, 0)):
             return INFO_NULL
         return INFO_UNKNOWN
diff --git a/rpython/jit/metainterp/optimizeopt/optimizer.py 
b/rpython/jit/metainterp/optimizeopt/optimizer.py
--- a/rpython/jit/metainterp/optimizeopt/optimizer.py
+++ b/rpython/jit/metainterp/optimizeopt/optimizer.py
@@ -273,7 +273,6 @@
         self.jitdriver_sd = jitdriver_sd
         self.cpu = metainterp_sd.cpu
         self.interned_refs = self.cpu.ts.new_ref_dict()
-        self.interned_ints = {}
         self.resumedata_memo = resume.ResumeDataLoopMemo(metainterp_sd)
         self.pendingfields = None # set temporarily to a list, normally by
                                   # heap.py, as we're about to generate a guard
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_intbound.py 
b/rpython/jit/metainterp/optimizeopt/test/test_intbound.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_intbound.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_intbound.py
@@ -1,12 +1,34 @@
 from rpython.jit.metainterp.optimizeopt.intutils import IntBound, 
IntUpperBound, \
-     IntLowerBound, IntUnbounded
-from rpython.jit.metainterp.optimizeopt.intbounds import next_pow2_m1
+     IntLowerBound, IntUnbounded, next_pow2_m1
 
 from copy import copy
 import sys
-from rpython.rlib.rarithmetic import LONG_BIT
+from rpython.rlib.rarithmetic import LONG_BIT, ovfcheck
 
-def bound(a,b):
+from hypothesis import given, strategies
+
+special_values = (
+    range(-100, 100) +
+    [2 ** i for i in range(1, LONG_BIT)] +
+    [-2 ** i for i in range(1, LONG_BIT)] +
+    [2 ** i - 1 for i in range(1, LONG_BIT)] +
+    [-2 ** i - 1 for i in range(1, LONG_BIT)] +
+    [2 ** i + 1 for i in range(1, LONG_BIT)] +
+    [-2 ** i + 1 for i in range(1, LONG_BIT)] +
+    [sys.maxint, -sys.maxint-1])
+
+special_values = strategies.sampled_from(
+    [int(v) for v in special_values if type(int(v)) is int])
+
+ints = strategies.builds(
+    int, # strategies.integers sometimes returns a long?
+    special_values | strategies.integers(
+    min_value=int(-sys.maxint-1), max_value=sys.maxint))
+
+ints_or_none = strategies.none() | ints
+
+
+def bound(a, b):
     if a is None and b is None:
         return IntUnbounded()
     elif a is None:
@@ -14,11 +36,55 @@
     elif b is None:
         return IntLowerBound(a)
     else:
-        return IntBound(a,b)
+        return IntBound(a, b)
 
 def const(a):
     return bound(a,a)
 
+
+def build_bound_with_contained_number(a, b, c):
+    a, b, c = sorted([a, b, c])
+    r = bound(a, c)
+    assert r.contains(b)
+    return r, b
+
+bound_with_contained_number = strategies.builds(
+    build_bound_with_contained_number,
+    ints_or_none,
+    ints_or_none,
+    ints
+)
+
+unbounded = strategies.builds(
+    lambda x: (bound(None, None), int(x)),
+    ints
+)
+
+lower_bounded = strategies.builds(
+    lambda x, y: (bound(min(x, y), None), max(x, y)),
+    ints,
+    ints
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy fix-vmprof-stacklet-switch-2: merge default

Reply via email to