Author: Antonio Cuni <anto.c...@gmail.com> Branch: fix-vmprof-stacklet-switch-2 Changeset: r93446:92e4ca3c2daa Date: 2017-12-16 12:02 +0100 http://bitbucket.org/pypy/pypy/changeset/92e4ca3c2daa/
Log: merge default diff too long, truncating to 2000 out of 2875 lines diff --git a/pypy/module/test_lib_pypy/test_json_extra.py b/extra_tests/test_json.py rename from pypy/module/test_lib_pypy/test_json_extra.py rename to extra_tests/test_json.py --- a/pypy/module/test_lib_pypy/test_json_extra.py +++ b/extra_tests/test_json.py @@ -1,4 +1,6 @@ -import py, json +import pytest +import json +from hypothesis import given, strategies def is_(x, y): return type(x) is type(y) and x == y @@ -6,12 +8,26 @@ def test_no_ensure_ascii(): assert is_(json.dumps(u"\u1234", ensure_ascii=False), u'"\u1234"') assert is_(json.dumps("\xc0", ensure_ascii=False), '"\xc0"') - e = py.test.raises(UnicodeDecodeError, json.dumps, - (u"\u1234", "\xc0"), ensure_ascii=False) - assert str(e.value).startswith("'ascii' codec can't decode byte 0xc0 ") - e = py.test.raises(UnicodeDecodeError, json.dumps, - ("\xc0", u"\u1234"), ensure_ascii=False) - assert str(e.value).startswith("'ascii' codec can't decode byte 0xc0 ") + with pytest.raises(UnicodeDecodeError) as excinfo: + json.dumps((u"\u1234", "\xc0"), ensure_ascii=False) + assert str(excinfo.value).startswith( + "'ascii' codec can't decode byte 0xc0 ") + with pytest.raises(UnicodeDecodeError) as excinfo: + json.dumps(("\xc0", u"\u1234"), ensure_ascii=False) + assert str(excinfo.value).startswith( + "'ascii' codec can't decode byte 0xc0 ") def test_issue2191(): assert is_(json.dumps(u"xxx", ensure_ascii=False), u'"xxx"') + +jsondata = strategies.recursive( + strategies.none() | + strategies.booleans() | + strategies.floats(allow_nan=False) | + strategies.text(), + lambda children: strategies.lists(children) | + strategies.dictionaries(strategies.text(), children)) + +@given(jsondata) +def test_roundtrip(d): + assert json.loads(json.dumps(d)) == d diff --git a/extra_tests/test_textio.py b/extra_tests/test_textio.py --- a/extra_tests/test_textio.py +++ b/extra_tests/test_textio.py @@ -1,28 +1,48 @@ from hypothesis import given, strategies as st from io import BytesIO, TextIOWrapper +import os -LINESEP = ['', '\r', '\n', '\r\n'] +def translate_newlines(text): + text = text.replace('\r\n', '\n') + text = text.replace('\r', '\n') + return text.replace('\n', os.linesep) @st.composite -def text_with_newlines(draw): - sep = draw(st.sampled_from(LINESEP)) - lines = draw(st.lists(st.text(max_size=10), max_size=10)) - return sep.join(lines) +def st_readline_universal( + draw, st_nlines=st.integers(min_value=0, max_value=10)): + n_lines = draw(st_nlines) + lines = draw(st.lists( + st.text(st.characters(blacklist_characters='\r\n')), + min_size=n_lines, max_size=n_lines)) + limits = [] + for line in lines: + limit = draw(st.integers(min_value=0, max_value=len(line) + 5)) + limits.append(limit) + limits.append(-1) + endings = draw(st.lists( + st.sampled_from(['\n', '\r', '\r\n']), + min_size=n_lines, max_size=n_lines)) + return ( + ''.join(line + ending for line, ending in zip(lines, endings)), + limits) -@given(txt=text_with_newlines(), - mode=st.sampled_from(['\r', '\n', '\r\n', '']), - limit=st.integers(min_value=-1)) -def test_readline(txt, mode, limit): +@given(data=st_readline_universal(), + mode=st.sampled_from(['\r', '\n', '\r\n', '', None])) +def test_readline(data, mode): + txt, limits = data textio = TextIOWrapper( - BytesIO(txt.encode('utf-8')), encoding='utf-8', newline=mode) + BytesIO(txt.encode('utf-8', 'surrogatepass')), + encoding='utf-8', errors='surrogatepass', newline=mode) lines = [] - while True: + for limit in limits: line = textio.readline(limit) - if limit > 0: - assert len(line) < limit + if limit >= 0: + assert len(line) <= limit if line: lines.append(line) - else: + elif limit: break - assert u''.join(lines) == txt + if mode is None: + txt = translate_newlines(txt) + assert txt.startswith(u''.join(lines)) diff --git a/lib_pypy/resource.py b/lib_pypy/resource.py --- a/lib_pypy/resource.py +++ b/lib_pypy/resource.py @@ -20,6 +20,7 @@ or via the attributes ru_utime, ru_stime, ru_maxrss, and so on.""" __metaclass__ = _structseq.structseqtype + name = "resource.struct_rusage" ru_utime = _structseq.structseqfield(0, "user time used") ru_stime = _structseq.structseqfield(1, "system time used") diff --git a/pypy/doc/build.rst b/pypy/doc/build.rst --- a/pypy/doc/build.rst +++ b/pypy/doc/build.rst @@ -149,7 +149,7 @@ xz-devel # For lzma on PyPy3. (XXX plus the SLES11 version of libgdbm-dev and tk-dev) -On Mac OS X:: +On Mac OS X: Most of these build-time dependencies are installed alongside the Developer Tools. However, note that in order for the installation to diff --git a/pypy/doc/cpython_differences.rst b/pypy/doc/cpython_differences.rst --- a/pypy/doc/cpython_differences.rst +++ b/pypy/doc/cpython_differences.rst @@ -355,7 +355,11 @@ containers (as list items or in sets for example), the exact rule of equality used is "``if x is y or x == y``" (on both CPython and PyPy); as a consequence, because all ``nans`` are identical in PyPy, you -cannot have several of them in a set, unlike in CPython. (Issue `#1974`__) +cannot have several of them in a set, unlike in CPython. (Issue `#1974`__). +Another consequence is that ``cmp(float('nan'), float('nan')) == 0``, because +``cmp`` checks with ``is`` first whether the arguments are identical (there is +no good value to return from this call to ``cmp``, because ``cmp`` pretends +that there is a total order on floats, but that is wrong for NaNs). .. __: https://bitbucket.org/pypy/pypy/issue/1974/different-behaviour-for-collections-of diff --git a/pypy/doc/how-to-release.rst b/pypy/doc/how-to-release.rst --- a/pypy/doc/how-to-release.rst +++ b/pypy/doc/how-to-release.rst @@ -62,7 +62,7 @@ * go to pypy/tool/release and run ``force-builds.py <release branch>`` The following JIT binaries should be built, however, we need more buildbots - windows, linux-32, linux-64, osx64, armhf-raring, armhf-raspberrian, armel, + windows, linux-32, linux-64, osx64, armhf-raspberrian, armel, freebsd64 * wait for builds to complete, make sure there are no failures diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -1,30 +1,42 @@ -=========================== -What's new in PyPy2.7 5.10+ -=========================== - -.. this is a revision shortly after release-pypy2.7-v5.9.0 -.. startrev:d56dadcef996 - -.. branch: cppyy-packaging -Cleanup and improve cppyy packaging - -.. branch: docs-osx-brew-openssl - -.. branch: keep-debug-symbols -Add a smartstrip tool, which can optionally keep the debug symbols in a -separate file, instead of just stripping them away. Use it in packaging - -.. branch: bsd-patches -Fix failures on FreeBSD, contributed by David Naylor as patches on the issue -tracker (issues 2694, 2695, 2696, 2697) - -.. branch: run-extra-tests -Run extra_tests/ in buildbot - -.. branch: vmprof-0.4.10 -Upgrade the _vmprof backend to vmprof 0.4.10 - -.. branch: fix-vmprof-stacklet-switch -.. branch: fix-vmprof-stacklet-switch-2 -Fix vmprof+ continulet (i.e. greenelts, eventlet, gevent, ...) - +=========================== +What's new in PyPy2.7 5.10+ +=========================== + +.. this is a revision shortly after release-pypy2.7-v5.9.0 +.. startrev:d56dadcef996 + + +.. branch: cppyy-packaging + +Cleanup and improve cppyy packaging + +.. branch: docs-osx-brew-openssl + +.. branch: keep-debug-symbols + +Add a smartstrip tool, which can optionally keep the debug symbols in a +separate file, instead of just stripping them away. Use it in packaging + +.. branch: bsd-patches + +Fix failures on FreeBSD, contributed by David Naylor as patches on the issue +tracker (issues 2694, 2695, 2696, 2697) + +.. branch: run-extra-tests + +Run extra_tests/ in buildbot + +.. branch: vmprof-0.4.10 + +Upgrade the _vmprof backend to vmprof 0.4.10 + +.. branch: fix-vmprof-stacklet-switch +.. branch: fix-vmprof-stacklet-switch-2 +Fix a vmprof+continulets (i.e. greenelts, eventlet, gevent, ...) + +.. branch: win32-vcvars + +.. branch: rdict-fast-hash + +Make it possible to declare that the hash function of an r_dict is fast in RPython. + diff --git a/pypy/doc/whatsnew-pypy2-5.6.0.rst b/pypy/doc/whatsnew-pypy2-5.6.0.rst --- a/pypy/doc/whatsnew-pypy2-5.6.0.rst +++ b/pypy/doc/whatsnew-pypy2-5.6.0.rst @@ -101,7 +101,7 @@ .. branch: newinitwarn -Match CPython's stricter handling of __new/init__ arguments +Match CPython's stricter handling of ``__new__``/``__init__`` arguments .. branch: openssl-1.1 diff --git a/pypy/doc/windows.rst b/pypy/doc/windows.rst --- a/pypy/doc/windows.rst +++ b/pypy/doc/windows.rst @@ -11,7 +11,7 @@ To build pypy-c you need a working python environment, and a C compiler. It is possible to translate with a CPython 2.6 or later, but this is not -the preferred way, because it will take a lot longer to run � depending +the preferred way, because it will take a lot longer to run – depending on your architecture, between two and three times as long. So head to `our downloads`_ and get the latest stable version. @@ -103,6 +103,7 @@ must also copy the ``vcvarsall.bat`` file fron the ``...\9.0`` directory to the ``...\9.0\VC`` directory, and edit it, changing the lines that set ``VCINSTALLDIR`` and ``WindowsSdkDir``:: + set VCINSTALLDIR=%~dp0\ set WindowsSdkDir=%~dp0\..\WinSDK\ diff --git a/pypy/interpreter/astcompiler/test/test_astbuilder.py b/pypy/interpreter/astcompiler/test/test_astbuilder.py --- a/pypy/interpreter/astcompiler/test/test_astbuilder.py +++ b/pypy/interpreter/astcompiler/test/test_astbuilder.py @@ -1246,3 +1246,7 @@ exc = py.test.raises(SyntaxError, self.get_ast, input).value assert exc.msg == ("(unicode error) 'unicodeescape' codec can't decode" " bytes in position 0-1: truncated \\xXX escape") + input = "u'\\x1'" + exc = py.test.raises(SyntaxError, self.get_ast, input).value + assert exc.msg == ("(unicode error) 'unicodeescape' codec can't decode" + " bytes in position 0-2: truncated \\xXX escape") diff --git a/pypy/interpreter/test/test_unicodehelper.py b/pypy/interpreter/test/test_unicodehelper.py --- a/pypy/interpreter/test/test_unicodehelper.py +++ b/pypy/interpreter/test/test_unicodehelper.py @@ -1,4 +1,7 @@ -from pypy.interpreter.unicodehelper import encode_utf8, decode_utf8 +import pytest +import struct +from pypy.interpreter.unicodehelper import ( + encode_utf8, decode_utf8, unicode_encode_utf_32_be) class FakeSpace: pass @@ -24,3 +27,23 @@ assert map(ord, got) == [0xd800, 0xdc00] got = decode_utf8(space, "\xf0\x90\x80\x80") assert map(ord, got) == [0x10000] + +@pytest.mark.parametrize('unich', [u"\ud800", u"\udc80"]) +def test_utf32_surrogates(unich): + assert (unicode_encode_utf_32_be(unich, 1, None) == + struct.pack('>i', ord(unich))) + with pytest.raises(UnicodeEncodeError): + unicode_encode_utf_32_be(unich, 1, None, allow_surrogates=False) + + def replace_with(ru, rs): + def errorhandler(errors, enc, msg, u, startingpos, endingpos): + if errors == 'strict': + raise UnicodeEncodeError(enc, u, startingpos, endingpos, msg) + return ru, rs, endingpos + return unicode_encode_utf_32_be( + u"<%s>" % unich, 3, None, + errorhandler, allow_surrogates=False) + + assert replace_with(u'rep', None) == u'<rep>'.encode('utf-32-be') + assert (replace_with(None, '\xca\xfe\xca\xfe') == + '\x00\x00\x00<\xca\xfe\xca\xfe\x00\x00\x00>') diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -1,7 +1,11 @@ +from rpython.rlib.objectmodel import specialize +from rpython.rlib.rarithmetic import intmask +from rpython.rlib.rstring import StringBuilder, UnicodeBuilder +from rpython.rlib import runicode +from rpython.rlib.runicode import ( + default_unicode_error_encode, default_unicode_error_decode, + MAXUNICODE, BYTEORDER, BYTEORDER2, UNICHR) from pypy.interpreter.error import OperationError -from rpython.rlib.objectmodel import specialize -from rpython.rlib import runicode -from pypy.module._codecs import interp_codecs @specialize.memo() def decode_error_handler(space): @@ -37,6 +41,7 @@ # These functions take and return unwrapped rpython strings and unicodes def decode_unicode_escape(space, string): + from pypy.module._codecs import interp_codecs state = space.fromcache(interp_codecs.CodecState) unicodedata_handler = state.get_unicodedata_handler(space) result, consumed = runicode.str_decode_unicode_escape( @@ -71,3 +76,229 @@ uni, len(uni), "strict", errorhandler=None, allow_surrogates=True) + +# ____________________________________________________________ +# utf-32 + +def str_decode_utf_32(s, size, errors, final=True, + errorhandler=None): + result, length, byteorder = str_decode_utf_32_helper( + s, size, errors, final, errorhandler, "native") + return result, length + +def str_decode_utf_32_be(s, size, errors, final=True, + errorhandler=None): + result, length, byteorder = str_decode_utf_32_helper( + s, size, errors, final, errorhandler, "big") + return result, length + +def str_decode_utf_32_le(s, size, errors, final=True, + errorhandler=None): + result, length, byteorder = str_decode_utf_32_helper( + s, size, errors, final, errorhandler, "little") + return result, length + +def py3k_str_decode_utf_32(s, size, errors, final=True, + errorhandler=None): + result, length, byteorder = str_decode_utf_32_helper( + s, size, errors, final, errorhandler, "native", 'utf-32-' + BYTEORDER2) + return result, length + +def py3k_str_decode_utf_32_be(s, size, errors, final=True, + errorhandler=None): + result, length, byteorder = str_decode_utf_32_helper( + s, size, errors, final, errorhandler, "big", 'utf-32-be') + return result, length + +def py3k_str_decode_utf_32_le(s, size, errors, final=True, + errorhandler=None): + result, length, byteorder = str_decode_utf_32_helper( + s, size, errors, final, errorhandler, "little", 'utf-32-le') + return result, length + +BOM32_DIRECT = intmask(0x0000FEFF) +BOM32_REVERSE = intmask(0xFFFE0000) + +def str_decode_utf_32_helper(s, size, errors, final=True, + errorhandler=None, + byteorder="native", + public_encoding_name='utf32'): + if errorhandler is None: + errorhandler = default_unicode_error_decode + bo = 0 + + if BYTEORDER == 'little': + iorder = [0, 1, 2, 3] + else: + iorder = [3, 2, 1, 0] + + # Check for BOM marks (U+FEFF) in the input and adjust current + # byte order setting accordingly. In native mode, the leading BOM + # mark is skipped, in all other modes, it is copied to the output + # stream as-is (giving a ZWNBSP character). + pos = 0 + if byteorder == 'native': + if size >= 4: + bom = intmask( + (ord(s[iorder[3]]) << 24) | (ord(s[iorder[2]]) << 16) | + (ord(s[iorder[1]]) << 8) | ord(s[iorder[0]])) + if BYTEORDER == 'little': + if bom == BOM32_DIRECT: + pos += 4 + bo = -1 + elif bom == BOM32_REVERSE: + pos += 4 + bo = 1 + else: + if bom == BOM32_DIRECT: + pos += 4 + bo = 1 + elif bom == BOM32_REVERSE: + pos += 4 + bo = -1 + elif byteorder == 'little': + bo = -1 + else: + bo = 1 + if size == 0: + return u'', 0, bo + if bo == -1: + # force little endian + iorder = [0, 1, 2, 3] + elif bo == 1: + # force big endian + iorder = [3, 2, 1, 0] + + result = UnicodeBuilder(size // 4) + + while pos < size: + # remaining bytes at the end? (size should be divisible by 4) + if len(s) - pos < 4: + if not final: + break + r, pos = errorhandler(errors, public_encoding_name, + "truncated data", + s, pos, len(s)) + result.append(r) + if len(s) - pos < 4: + break + continue + ch = ((ord(s[pos + iorder[3]]) << 24) | (ord(s[pos + iorder[2]]) << 16) | + (ord(s[pos + iorder[1]]) << 8) | ord(s[pos + iorder[0]])) + if ch >= 0x110000: + r, pos = errorhandler(errors, public_encoding_name, + "codepoint not in range(0x110000)", + s, pos, len(s)) + result.append(r) + continue + + if MAXUNICODE < 65536 and ch >= 0x10000: + ch -= 0x10000L + result.append(unichr(0xD800 + (ch >> 10))) + result.append(unichr(0xDC00 + (ch & 0x03FF))) + else: + result.append(UNICHR(ch)) + pos += 4 + return result.build(), pos, bo + +def _STORECHAR32(result, CH, byteorder): + c0 = chr(((CH) >> 24) & 0xff) + c1 = chr(((CH) >> 16) & 0xff) + c2 = chr(((CH) >> 8) & 0xff) + c3 = chr((CH) & 0xff) + if byteorder == 'little': + result.append(c3) + result.append(c2) + result.append(c1) + result.append(c0) + else: + result.append(c0) + result.append(c1) + result.append(c2) + result.append(c3) + +def unicode_encode_utf_32_helper(s, size, errors, + errorhandler=None, + allow_surrogates=True, + byteorder='little', + public_encoding_name='utf32'): + if errorhandler is None: + errorhandler = default_unicode_error_encode + if size == 0: + if byteorder == 'native': + result = StringBuilder(4) + _STORECHAR32(result, 0xFEFF, BYTEORDER) + return result.build() + return "" + + result = StringBuilder(size * 4 + 4) + if byteorder == 'native': + _STORECHAR32(result, 0xFEFF, BYTEORDER) + byteorder = BYTEORDER + + pos = 0 + while pos < size: + ch = ord(s[pos]) + pos += 1 + ch2 = 0 + if not allow_surrogates and 0xD800 <= ch < 0xE000: + ru, rs, pos = errorhandler( + errors, public_encoding_name, 'surrogates not allowed', + s, pos - 1, pos) + if rs is not None: + # py3k only + if len(rs) % 4 != 0: + errorhandler( + 'strict', public_encoding_name, 'surrogates not allowed', + s, pos - 1, pos) + result.append(rs) + continue + for ch in ru: + if ord(ch) < 0xD800: + _STORECHAR32(result, ord(ch), byteorder) + else: + errorhandler( + 'strict', public_encoding_name, + 'surrogates not allowed', s, pos - 1, pos) + continue + if 0xD800 <= ch < 0xDC00 and MAXUNICODE < 65536 and pos < size: + ch2 = ord(s[pos]) + if 0xDC00 <= ch2 < 0xE000: + ch = (((ch & 0x3FF) << 10) | (ch2 & 0x3FF)) + 0x10000 + pos += 1 + _STORECHAR32(result, ch, byteorder) + + return result.build() + +def unicode_encode_utf_32(s, size, errors, + errorhandler=None, allow_surrogates=True): + return unicode_encode_utf_32_helper(s, size, errors, errorhandler, + allow_surrogates, "native") + +def unicode_encode_utf_32_be(s, size, errors, + errorhandler=None, allow_surrogates=True): + return unicode_encode_utf_32_helper(s, size, errors, errorhandler, + allow_surrogates, "big") + +def unicode_encode_utf_32_le(s, size, errors, + errorhandler=None, allow_surrogates=True): + return unicode_encode_utf_32_helper(s, size, errors, errorhandler, + allow_surrogates, "little") + +def py3k_unicode_encode_utf_32(s, size, errors, + errorhandler=None, allow_surrogates=True): + return unicode_encode_utf_32_helper(s, size, errors, errorhandler, + allow_surrogates, "native", + 'utf-32-' + BYTEORDER2) + +def py3k_unicode_encode_utf_32_be(s, size, errors, + errorhandler=None, allow_surrogates=True): + return unicode_encode_utf_32_helper(s, size, errors, errorhandler, + allow_surrogates, "big", + 'utf-32-be') + +def py3k_unicode_encode_utf_32_le(s, size, errors, + errorhandler=None, allow_surrogates=True): + return unicode_encode_utf_32_helper(s, size, errors, errorhandler, + allow_surrogates, "little", + 'utf-32-le') diff --git a/pypy/module/__builtin__/test/test_builtin.py b/pypy/module/__builtin__/test/test_builtin.py --- a/pypy/module/__builtin__/test/test_builtin.py +++ b/pypy/module/__builtin__/test/test_builtin.py @@ -404,6 +404,7 @@ def test_cmp(self): + assert cmp(float('nan'), float('nan')) == 0 assert cmp(9,9) == 0 assert cmp(0,9) < 0 assert cmp(9,0) > 0 diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -1,10 +1,12 @@ from rpython.rlib import jit from rpython.rlib.objectmodel import we_are_translated, not_rpython from rpython.rlib.rstring import UnicodeBuilder +from rpython.rlib import runicode from rpython.rlib.runicode import code_to_unichr, MAXUNICODE from pypy.interpreter.error import OperationError, oefmt from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault +from pypy.interpreter import unicodehelper class VersionTag(object): @@ -210,7 +212,8 @@ def xmlcharrefreplace_errors(space, w_exc): check_exception(space, w_exc) if space.isinstance_w(w_exc, space.w_UnicodeEncodeError): - obj = space.realunicode_w(space.getattr(w_exc, space.newtext('object'))) + w_obj = space.getattr(w_exc, space.newtext('object')) + obj = space.realunicode_w(w_obj) start = space.int_w(space.getattr(w_exc, space.newtext('start'))) w_end = space.getattr(w_exc, space.newtext('end')) end = space.int_w(w_end) @@ -236,7 +239,8 @@ def backslashreplace_errors(space, w_exc): check_exception(space, w_exc) if space.isinstance_w(w_exc, space.w_UnicodeEncodeError): - obj = space.realunicode_w(space.getattr(w_exc, space.newtext('object'))) + w_obj = space.getattr(w_exc, space.newtext('object')) + obj = space.realunicode_w(w_obj) start = space.int_w(space.getattr(w_exc, space.newtext('start'))) w_end = space.getattr(w_exc, space.newtext('end')) end = space.int_w(w_end) @@ -363,19 +367,23 @@ raise oefmt(space.w_TypeError, "handler must be callable") # ____________________________________________________________ -# delegation to runicode +# delegation to runicode/unicodehelper -from rpython.rlib import runicode +def _find_implementation(impl_name): + try: + func = getattr(unicodehelper, impl_name) + except AttributeError: + func = getattr(runicode, impl_name) + return func def make_encoder_wrapper(name): rname = "unicode_encode_%s" % (name.replace("_encode", ""), ) - assert hasattr(runicode, rname) + func = _find_implementation(rname) @unwrap_spec(uni=unicode, errors='text_or_none') def wrap_encoder(space, uni, errors="strict"): if errors is None: errors = 'strict' state = space.fromcache(CodecState) - func = getattr(runicode, rname) result = func(uni, len(uni), errors, state.encode_error_handler) return space.newtuple([space.newbytes(result), space.newint(len(uni))]) wrap_encoder.func_name = rname @@ -383,7 +391,7 @@ def make_decoder_wrapper(name): rname = "str_decode_%s" % (name.replace("_decode", ""), ) - assert hasattr(runicode, rname) + func = _find_implementation(rname) @unwrap_spec(string='bufferstr', errors='text_or_none', w_final=WrappedDefault(False)) def wrap_decoder(space, string, errors="strict", w_final=None): @@ -391,7 +399,6 @@ errors = 'strict' final = space.is_true(w_final) state = space.fromcache(CodecState) - func = getattr(runicode, rname) result, consumed = func(string, len(string), errors, final, state.decode_error_handler) return space.newtuple([space.newunicode(result), space.newint(consumed)]) diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py --- a/pypy/module/_codecs/test/test_codecs.py +++ b/pypy/module/_codecs/test/test_codecs.py @@ -115,10 +115,10 @@ raises(TypeError, charmap_decode, '\xff', "strict", {0xff: 0x110000}) assert (charmap_decode("\x00\x01\x02", "strict", {0: 0x10FFFF, 1: ord('b'), 2: ord('c')}) == - u"\U0010FFFFbc", 3) + (u"\U0010FFFFbc", 3)) assert (charmap_decode("\x00\x01\x02", "strict", {0: u'\U0010FFFF', 1: u'b', 2: u'c'}) == - u"\U0010FFFFbc", 3) + (u"\U0010FFFFbc", 3)) def test_escape_decode_errors(self): from _codecs import escape_decode as decode @@ -537,8 +537,12 @@ assert '\xff'.decode('utf-7', 'ignore') == '' assert '\x00'.decode('unicode-internal', 'ignore') == '' - def test_backslahreplace(self): - assert u'a\xac\u1234\u20ac\u8000'.encode('ascii', 'backslashreplace') == 'a\\xac\u1234\u20ac\u8000' + def test_backslashreplace(self): + sin = u"a\xac\u1234\u20ac\u8000\U0010ffff" + expected = "a\\xac\\u1234\\u20ac\\u8000\\U0010ffff" + assert sin.encode('ascii', 'backslashreplace') == expected + expected = "a\xac\\u1234\xa4\\u8000\\U0010ffff" + assert sin.encode("iso-8859-15", "backslashreplace") == expected def test_badhandler(self): import codecs @@ -592,11 +596,11 @@ def handler_unicodeinternal(exc): if not isinstance(exc, UnicodeDecodeError): raise TypeError("don't know how to handle %r" % exc) - return (u"\x01", 1) + return (u"\x01", 4) codecs.register_error("test.hui", handler_unicodeinternal) res = "\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui") if sys.maxunicode > 65535: - assert res == u"\u0000\u0001\u0000" # UCS4 build + assert res == u"\u0000\u0001" # UCS4 build else: assert res == u"\x00\x00\x01\x00\x00" # UCS2 build @@ -750,3 +754,31 @@ assert _codecs.unicode_escape_decode(b) == (u'', 0) assert _codecs.raw_unicode_escape_decode(b) == (u'', 0) assert _codecs.unicode_internal_decode(b) == (u'', 0) + + def test_xmlcharrefreplace(self): + r = u'\u1234\u0080\u2345\u0079\u00AB'.encode('latin1', 'xmlcharrefreplace') + assert r == 'ሴ\x80⍅y\xab' + r = u'\u1234\u0080\u2345\u0079\u00AB'.encode('ascii', 'xmlcharrefreplace') + assert r == 'ሴ€⍅y«' + + def test_errorhandler_collection(self): + import _codecs + errors = [] + def record_error(exc): + if not isinstance(exc, UnicodeEncodeError): + raise TypeError("don't know how to handle %r" % exc) + errors.append(exc.object[exc.start:exc.end]) + return (u'', exc.end) + _codecs.register_error("test.record", record_error) + + sin = u"\xac\u1234\u1234\u20ac\u8000" + assert sin.encode("ascii", "test.record") == "" + assert errors == [sin] + + errors = [] + assert sin.encode("latin-1", "test.record") == "\xac" + assert errors == [u'\u1234\u1234\u20ac\u8000'] + + errors = [] + assert sin.encode("iso-8859-15", "test.record") == "\xac\xa4" + assert errors == [u'\u1234\u1234', u'\u8000'] diff --git a/pypy/module/test_lib_pypy/test_greenlet.py b/pypy/module/_continuation/test/test_greenlet.py rename from pypy/module/test_lib_pypy/test_greenlet.py rename to pypy/module/_continuation/test/test_greenlet.py diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py --- a/pypy/module/_io/interp_textio.py +++ b/pypy/module/_io/interp_textio.py @@ -353,6 +353,7 @@ while scanned < limit: try: ch = self.next_char() + scanned += 1 except StopIteration: return False if ch == u'\n': @@ -413,6 +414,7 @@ if not space.isinstance_w(w_decoded, space.w_unicode): msg = "decoder should return a string result, not '%T'" raise oefmt(space.w_TypeError, msg, w_decoded) + return w_decoded class W_TextIOWrapper(W_TextIOBase): @@ -737,7 +739,7 @@ remnant = None continue - if limit > 0: + if limit >= 0: remaining = limit - builder.getlength() assert remaining >= 0 else: @@ -939,12 +941,13 @@ w_decoded = space.call_method(self.w_decoder, "decode", w_chunk, space.newbool(bool(cookie.need_eof))) - self.decoded.set(space, w_decoded) + w_decoded = check_decoded(space, w_decoded) # Skip chars_to_skip of the decoded characters - if len(self.decoded.text) < cookie.chars_to_skip: + if space.len_w(w_decoded) < cookie.chars_to_skip: raise oefmt(space.w_IOError, "can't restore logical file position") + self.decoded.set(space, w_decoded) self.decoded.pos = cookie.chars_to_skip else: self.snapshot = PositionSnapshot(cookie.dec_flags, "") @@ -957,10 +960,8 @@ def tell_w(self, space): self._check_closed(space) - if not self.seekable: raise oefmt(space.w_IOError, "underlying stream is not seekable") - if not self.telling: raise oefmt(space.w_IOError, "telling position disabled by next() call") @@ -1030,14 +1031,14 @@ # We didn't get enough decoded data; signal EOF to get more. w_decoded = space.call_method(self.w_decoder, "decode", space.newbytes(""), - space.newint(1)) # final=1 + space.newint(1)) # final=1 check_decoded(space, w_decoded) - chars_decoded += len(space.unicode_w(w_decoded)) + chars_decoded += space.len_w(w_decoded) cookie.need_eof = 1 if chars_decoded < chars_to_skip: raise oefmt(space.w_IOError, - "can't reconstruct logical file position") + "can't reconstruct logical file position") finally: space.call_method(self.w_decoder, "setstate", w_saved_state) diff --git a/pypy/module/_io/test/test_interp_textio.py b/pypy/module/_io/test/test_interp_textio.py --- a/pypy/module/_io/test/test_interp_textio.py +++ b/pypy/module/_io/test/test_interp_textio.py @@ -1,40 +1,54 @@ import pytest try: - from hypothesis import given, strategies as st, assume + from hypothesis import given, strategies as st except ImportError: pytest.skip("hypothesis required") +import os from pypy.module._io.interp_bytesio import W_BytesIO from pypy.module._io.interp_textio import W_TextIOWrapper, DecodeBuffer -LINESEP = ['', '\r', '\n', '\r\n'] +def translate_newlines(text): + text = text.replace(u'\r\n', u'\n') + text = text.replace(u'\r', u'\n') + return text.replace(u'\n', os.linesep) @st.composite -def text_with_newlines(draw): - sep = draw(st.sampled_from(LINESEP)) - lines = draw(st.lists(st.text(max_size=10), max_size=10)) - return sep.join(lines) +def st_readline(draw, st_nlines=st.integers(min_value=0, max_value=10)): + n_lines = draw(st_nlines) + fragments = [] + limits = [] + for _ in range(n_lines): + line = draw(st.text(st.characters(blacklist_characters=u'\r\n'))) + fragments.append(line) + ending = draw(st.sampled_from([u'\n', u'\r', u'\r\n'])) + fragments.append(ending) + limit = draw(st.integers(min_value=0, max_value=len(line) + 5)) + limits.append(limit) + limits.append(-1) + return (u''.join(fragments), limits) -@given(txt=text_with_newlines(), - mode=st.sampled_from(['\r', '\n', '\r\n', '']), - limit=st.integers(min_value=-1)) -def test_readline(space, txt, mode, limit): - assume(limit != 0) +@given(data=st_readline(), + mode=st.sampled_from(['\r', '\n', '\r\n', ''])) +def test_readline(space, data, mode): + txt, limits = data w_stream = W_BytesIO(space) w_stream.descr_init(space, space.newbytes(txt.encode('utf-8'))) w_textio = W_TextIOWrapper(space) w_textio.descr_init( - space, w_stream, encoding='utf-8', + space, w_stream, + encoding='utf-8', w_errors=space.newtext('surrogatepass'), w_newline=space.newtext(mode)) lines = [] - while True: - line = space.unicode_w(w_textio.readline_w(space, space.newint(limit))) - if limit > 0: + for limit in limits: + w_line = w_textio.readline_w(space, space.newint(limit)) + line = space.unicode_w(w_line) + if limit >= 0: assert len(line) <= limit if line: lines.append(line) - else: + elif limit: break - assert u''.join(lines) == txt + assert txt.startswith(u''.join(lines)) @given(st.text()) def test_read_buffer(text): diff --git a/pypy/module/_pypyjson/interp_decoder.py b/pypy/module/_pypyjson/interp_decoder.py --- a/pypy/module/_pypyjson/interp_decoder.py +++ b/pypy/module/_pypyjson/interp_decoder.py @@ -71,7 +71,7 @@ self.ll_chars = rffi.str2charp(s) self.end_ptr = lltype.malloc(rffi.CCHARPP.TO, 1, flavor='raw') self.pos = 0 - self.cache = r_dict(slice_eq, slice_hash) + self.cache = r_dict(slice_eq, slice_hash, simple_hash_eq=True) def close(self): rffi.free_charp(self.ll_chars) diff --git a/pypy/module/_pypyjson/interp_encoder.py b/pypy/module/_pypyjson/interp_encoder.py --- a/pypy/module/_pypyjson/interp_encoder.py +++ b/pypy/module/_pypyjson/interp_encoder.py @@ -49,24 +49,24 @@ first = 0 for i in range(first, len(u)): - c = u[i] - if c <= u'~': - if c == u'"' or c == u'\\': + c = ord(u[i]) + if c <= ord('~'): + if c == ord('"') or c == ord('\\'): sb.append('\\') - elif c < u' ': - sb.append(ESCAPE_BEFORE_SPACE[ord(c)]) + elif c < ord(' '): + sb.append(ESCAPE_BEFORE_SPACE[c]) continue - sb.append(chr(ord(c))) + sb.append(chr(c)) else: - if c <= u'\uffff': + if c <= ord(u'\uffff'): sb.append('\\u') - sb.append(HEX[ord(c) >> 12]) - sb.append(HEX[(ord(c) >> 8) & 0x0f]) - sb.append(HEX[(ord(c) >> 4) & 0x0f]) - sb.append(HEX[ord(c) & 0x0f]) + sb.append(HEX[c >> 12]) + sb.append(HEX[(c >> 8) & 0x0f]) + sb.append(HEX[(c >> 4) & 0x0f]) + sb.append(HEX[c & 0x0f]) else: # surrogate pair - n = ord(c) - 0x10000 + n = c - 0x10000 s1 = 0xd800 | ((n >> 10) & 0x3ff) sb.append('\\ud') sb.append(HEX[(s1 >> 8) & 0x0f]) diff --git a/pypy/module/_rawffi/alt/type_converter.py b/pypy/module/_rawffi/alt/type_converter.py --- a/pypy/module/_rawffi/alt/type_converter.py +++ b/pypy/module/_rawffi/alt/type_converter.py @@ -128,7 +128,7 @@ intval: lltype.Signed """ self.error(w_ffitype, w_obj) - + def handle_unichar(self, w_ffitype, w_obj, intval): """ intval: lltype.Signed @@ -174,7 +174,7 @@ def handle_struct_rawffi(self, w_ffitype, w_structinstance): """ This method should be killed as soon as we remove support for _rawffi structures - + w_structinstance: W_StructureInstance """ self.error(w_ffitype, w_structinstance) @@ -349,7 +349,7 @@ def get_struct_rawffi(self, w_ffitype, w_structdescr): """ This should be killed as soon as we kill support for _rawffi structures - + Return type: lltype.Unsigned (the address of the structure) """ diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py --- a/pypy/module/_sre/interp_sre.py +++ b/pypy/module/_sre/interp_sre.py @@ -481,11 +481,13 @@ @unwrap_spec(w_groupnum=WrappedDefault(0)) def start_w(self, w_groupnum): - return self.space.newint(self.do_span(w_groupnum)[0]) + start, end = self.do_span(w_groupnum) + return self.space.newint(start) @unwrap_spec(w_groupnum=WrappedDefault(0)) def end_w(self, w_groupnum): - return self.space.newint(self.do_span(w_groupnum)[1]) + start, end = self.do_span(w_groupnum) + return self.space.newint(end) @unwrap_spec(w_groupnum=WrappedDefault(0)) def span_w(self, w_groupnum): diff --git a/pypy/module/_sre/test/test_app_sre.py b/pypy/module/_sre/test/test_app_sre.py --- a/pypy/module/_sre/test/test_app_sre.py +++ b/pypy/module/_sre/test/test_app_sre.py @@ -87,6 +87,14 @@ assert [("a", "l"), ("u", "s")] == re.findall("b(.)(.)", "abalbus") assert [("a", ""), ("s", "s")] == re.findall("b(a|(s))", "babs") + def test_findall_unicode(self): + import re + assert [u"\u1234"] == re.findall(u"\u1234", u"\u1000\u1234\u2000") + assert ["a", "u"] == re.findall("b(.)", "abalbus") + assert [("a", "l"), ("u", "s")] == re.findall("b(.)(.)", "abalbus") + assert [("a", ""), ("s", "s")] == re.findall("b(a|(s))", "babs") + assert [u"xyz"] == re.findall(u".*yz", u"xyz") + def test_finditer(self): import re it = re.finditer("b(.)", "brabbel") @@ -999,3 +1007,15 @@ import re assert re.search(".+ab", "wowowowawoabwowo") assert None == re.search(".+ab", "wowowaowowo") + + +class AppTestUnicodeExtra: + def test_string_attribute(self): + import re + match = re.search(u"\u1234", u"\u1233\u1234\u1235") + assert match.string == u"\u1233\u1234\u1235" + + def test_match_start(self): + import re + match = re.search(u"\u1234", u"\u1233\u1234\u1235") + assert match.start() == 1 diff --git a/pypy/module/cpyext/pyerrors.py b/pypy/module/cpyext/pyerrors.py --- a/pypy/module/cpyext/pyerrors.py +++ b/pypy/module/cpyext/pyerrors.py @@ -122,7 +122,7 @@ error indicator.""" raise oefmt(space.w_TypeError, "bad argument type for built-in operation") -@cpython_api([], lltype.Void) +@cpython_api([], lltype.Void, error=None) def PyErr_BadInternalCall(space): raise oefmt(space.w_SystemError, "Bad internal call!") diff --git a/pypy/module/cpyext/test/test_codecs.py b/pypy/module/cpyext/test/test_codecs.py --- a/pypy/module/cpyext/test/test_codecs.py +++ b/pypy/module/cpyext/test/test_codecs.py @@ -11,5 +11,5 @@ w_encoded = space.call_method(w_encoder, 'encode', space.wrap(u'späm')) w_decoder = PyCodec_IncrementalDecoder(space, utf8, None) w_decoded = space.call_method(w_decoder, 'decode', w_encoded) - assert space.unwrap(w_decoded) == u'späm' + assert space.unicode_w(w_decoded) == u'späm' rffi.free_charp(utf8) diff --git a/pypy/module/cpyext/test/test_eval.py b/pypy/module/cpyext/test/test_eval.py --- a/pypy/module/cpyext/test/test_eval.py +++ b/pypy/module/cpyext/test/test_eval.py @@ -131,7 +131,7 @@ finally: rffi.free_charp(buf) w_a = space.getitem(w_globals, space.wrap("a")) - assert space.unwrap(w_a) == u'caf\xe9' + assert space.unicode_w(w_a) == u'caf\xe9' lltype.free(flags, flavor='raw') def test_run_file(self, space): diff --git a/pypy/module/cpyext/test/test_object.py b/pypy/module/cpyext/test/test_object.py --- a/pypy/module/cpyext/test/test_object.py +++ b/pypy/module/cpyext/test/test_object.py @@ -8,7 +8,7 @@ from pypy.module.cpyext.object import ( PyObject_IsTrue, PyObject_Not, PyObject_GetAttrString, PyObject_DelAttrString, PyObject_GetAttr, PyObject_DelAttr, - PyObject_GetItem, + PyObject_GetItem, PyObject_IsInstance, PyObject_IsSubclass, PyObject_AsFileDescriptor, PyObject_Hash, PyObject_Cmp, PyObject_Unicode ) @@ -209,9 +209,9 @@ PyObject_Cmp(space, w(u"\xe9"), w("\xe9"), ptr) def test_unicode(self, space, api): - assert space.unwrap(api.PyObject_Unicode(None)) == u"<NULL>" - assert space.unwrap(api.PyObject_Unicode(space.wrap([]))) == u"[]" - assert space.unwrap(api.PyObject_Unicode(space.wrap("e"))) == u"e" + assert space.unicode_w(api.PyObject_Unicode(None)) == u"<NULL>" + assert space.unicode_w(api.PyObject_Unicode(space.wrap([]))) == u"[]" + assert space.unicode_w(api.PyObject_Unicode(space.wrap("e"))) == u"e" with raises_w(space, UnicodeDecodeError): PyObject_Unicode(space, space.wrap("\xe9")) @@ -562,7 +562,7 @@ PyObject *a = PyTuple_GetItem(args, 0); PyObject *b = PyTuple_GetItem(args, 1); int res = PyObject_RichCompareBool(a, b, Py_EQ); - return PyLong_FromLong(res); + return PyLong_FromLong(res); """),]) a = float('nan') b = float('nan') diff --git a/pypy/module/cpyext/test/test_pyerrors.py b/pypy/module/cpyext/test/test_pyerrors.py --- a/pypy/module/cpyext/test/test_pyerrors.py +++ b/pypy/module/cpyext/test/test_pyerrors.py @@ -425,3 +425,15 @@ assert orig_exc_info == reset_sys_exc_info assert new_exc_info == (new_exc.__class__, new_exc, None) assert new_exc_info == new_sys_exc_info + + def test_PyErr_BadInternalCall(self): + # NB. it only seemed to fail when run with '-s'... but I think + # that it always printed stuff to stderr + module = self.import_extension('foo', [ + ("oops", "METH_NOARGS", + r''' + PyErr_BadInternalCall(); + return NULL; + '''), + ]) + raises(SystemError, module.oops) diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py --- a/pypy/module/cpyext/test/test_unicodeobject.py +++ b/pypy/module/cpyext/test/test_unicodeobject.py @@ -178,7 +178,7 @@ array = rffi.cast(rffi.CWCHARP, PyUnicode_AS_DATA(space, word)) array2 = PyUnicode_AS_UNICODE(space, word) array3 = PyUnicode_AsUnicode(space, word) - for (i, char) in enumerate(space.unwrap(word)): + for (i, char) in enumerate(space.unicode_w(word)): assert array[i] == char assert array2[i] == char assert array3[i] == char @@ -216,12 +216,12 @@ def test_fromstring(self, space): s = rffi.str2charp(u'sp\x09m'.encode("utf-8")) w_res = PyUnicode_FromString(space, s) - assert space.unwrap(w_res) == u'sp\x09m' + assert space.unicode_w(w_res) == u'sp\x09m' res = PyUnicode_FromStringAndSize(space, s, 4) w_res = from_ref(space, res) Py_DecRef(space, res) - assert space.unwrap(w_res) == u'sp\x09m' + assert space.unicode_w(w_res) == u'sp\x09m' rffi.free_charp(s) def test_unicode_resize(self, space): @@ -256,17 +256,17 @@ u = rffi.str2charp(u'sp\x134m'.encode("utf-8")) w_u = PyUnicode_DecodeUTF8(space, u, 5, None) assert space.type(w_u) is space.w_unicode - assert space.unwrap(w_u) == u'sp\x134m' + assert space.unicode_w(w_u) == u'sp\x134m' w_u = PyUnicode_DecodeUTF8(space, u, 2, None) assert space.type(w_u) is space.w_unicode - assert space.unwrap(w_u) == 'sp' + assert space.unicode_w(w_u) == 'sp' rffi.free_charp(u) def test_encode_utf8(self, space): u = rffi.unicode2wcharp(u'sp\x09m') w_s = PyUnicode_EncodeUTF8(space, u, 4, None) - assert space.unwrap(w_s) == u'sp\x09m'.encode('utf-8') + assert space.unicode_w(w_s) == u'sp\x09m'.encode('utf-8') rffi.free_wcharp(u) def test_encode_decimal(self, space): @@ -364,18 +364,18 @@ def test_fromobject(self, space): w_u = space.wrap(u'a') assert PyUnicode_FromObject(space, w_u) is w_u - assert space.unwrap( + assert space.unicode_w( PyUnicode_FromObject(space, space.wrap('test'))) == 'test' def test_decode(self, space): b_text = rffi.str2charp('caf\x82xx') b_encoding = rffi.str2charp('cp437') - assert space.unwrap( + assert space.unicode_w( PyUnicode_Decode(space, b_text, 4, b_encoding, None)) == u'caf\xe9' w_text = PyUnicode_FromEncodedObject(space, space.wrap("test"), b_encoding, None) assert space.isinstance_w(w_text, space.w_unicode) - assert space.unwrap(w_text) == "test" + assert space.unicode_w(w_text) == "test" with raises_w(space, TypeError): PyUnicode_FromEncodedObject(space, space.wrap(u"test"), @@ -391,7 +391,8 @@ u_text = u'abcdefg' s_text = space.str_w(PyUnicode_AsEncodedString(space, space.wrap(u_text), null_charp, null_charp)) b_text = rffi.str2charp(s_text) - assert space.unwrap(PyUnicode_Decode(space, b_text, len(s_text), null_charp, null_charp)) == u_text + assert space.unicode_w(PyUnicode_Decode( + space, b_text, len(s_text), null_charp, null_charp)) == u_text with raises_w(space, TypeError): PyUnicode_FromEncodedObject( space, space.wrap(u_text), null_charp, None) @@ -508,7 +509,7 @@ def test_concat(self, space): w_res = PyUnicode_Concat(space, space.wrap(u'a'), space.wrap(u'b')) - assert space.unwrap(w_res) == u'ab' + assert space.unicode_w(w_res) == u'ab' def test_copy(self, space): w_x = space.wrap(u"abcd\u0660") @@ -579,29 +580,30 @@ w_format = space.wrap(u'hi %s') w_args = space.wrap((u'test',)) w_formated = PyUnicode_Format(space, w_format, w_args) - assert space.unwrap(w_formated) == space.unwrap(space.mod(w_format, w_args)) + assert (space.unicode_w(w_formated) == + space.unicode_w(space.mod(w_format, w_args))) def test_join(self, space): w_sep = space.wrap(u'<sep>') w_seq = space.wrap([u'a', u'b']) w_joined = PyUnicode_Join(space, w_sep, w_seq) - assert space.unwrap(w_joined) == u'a<sep>b' + assert space.unicode_w(w_joined) == u'a<sep>b' def test_fromordinal(self, space): w_char = PyUnicode_FromOrdinal(space, 65) - assert space.unwrap(w_char) == u'A' + assert space.unicode_w(w_char) == u'A' w_char = PyUnicode_FromOrdinal(space, 0) - assert space.unwrap(w_char) == u'\0' + assert space.unicode_w(w_char) == u'\0' w_char = PyUnicode_FromOrdinal(space, 0xFFFF) - assert space.unwrap(w_char) == u'\uFFFF' + assert space.unicode_w(w_char) == u'\uFFFF' def test_replace(self, space): w_str = space.wrap(u"abababab") w_substr = space.wrap(u"a") w_replstr = space.wrap(u"z") - assert u"zbzbabab" == space.unwrap( + assert u"zbzbabab" == space.unicode_w( PyUnicode_Replace(space, w_str, w_substr, w_replstr, 2)) - assert u"zbzbzbzb" == space.unwrap( + assert u"zbzbzbzb" == space.unicode_w( PyUnicode_Replace(space, w_str, w_substr, w_replstr, -1)) def test_tailmatch(self, space): diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py --- a/pypy/module/cpyext/unicodeobject.py +++ b/pypy/module/cpyext/unicodeobject.py @@ -12,6 +12,7 @@ from pypy.module.cpyext.bytesobject import PyString_Check from pypy.module.sys.interp_encoding import setdefaultencoding from pypy.module._codecs.interp_codecs import CodecState +from pypy.interpreter import unicodehelper from pypy.objspace.std import unicodeobject from rpython.rlib import rstring, runicode from rpython.tool.sourcetools import func_renamer @@ -620,7 +621,7 @@ else: errors = None - result, length, byteorder = runicode.str_decode_utf_32_helper( + result, length, byteorder = unicodehelper.str_decode_utf_32_helper( string, size, errors, True, # final ? false for multiple passes? None, # errorhandler diff --git a/pypy/module/posix/test/test_posix2.py b/pypy/module/posix/test/test_posix2.py --- a/pypy/module/posix/test/test_posix2.py +++ b/pypy/module/posix/test/test_posix2.py @@ -31,9 +31,15 @@ pdir.join('file2').write("test2") pdir.join('another_longer_file_name').write("test3") mod.pdir = pdir - unicode_dir = udir.ensure('fi\xc5\x9fier.txt', dir=True) + if sys.platform == 'darwin': + # see issue https://bugs.python.org/issue31380 + unicode_dir = udir.ensure('fixc5x9fier.txt', dir=True) + file_name = 'cafxe9' + else: + unicode_dir = udir.ensure('fi\xc5\x9fier.txt', dir=True) + file_name = 'caf\xe9' unicode_dir.join('somefile').write('who cares?') - unicode_dir.join('caf\xe9').write('who knows?') + unicode_dir.join(file_name).write('who knows?') mod.unicode_dir = unicode_dir # in applevel tests, os.stat uses the CPython os.stat. diff --git a/pypy/module/time/interp_time.py b/pypy/module/time/interp_time.py --- a/pypy/module/time/interp_time.py +++ b/pypy/module/time/interp_time.py @@ -310,12 +310,19 @@ errno = rposix.get_saved_errno() return os.strerror(errno) +def _check_sleep_arg(space, secs): + from rpython.rlib.rfloat import isinf, isnan + if secs < 0: + raise oefmt(space.w_IOError, + "Invalid argument: negative time in sleep") + if isinf(secs) or isnan(secs): + raise oefmt(space.w_IOError, + "Invalid argument: inf or nan") + if sys.platform != 'win32': @unwrap_spec(secs=float) def sleep(space, secs): - if secs < 0: - raise oefmt(space.w_IOError, - "Invalid argument: negative time in sleep") + _check_sleep_arg(space, secs) rtime.sleep(secs) else: from rpython.rlib import rwin32 @@ -336,9 +343,7 @@ OSError(EINTR, "sleep() interrupted")) @unwrap_spec(secs=float) def sleep(space, secs): - if secs < 0: - raise oefmt(space.w_IOError, - "Invalid argument: negative time in sleep") + _check_sleep_arg(space, secs) # as decreed by Guido, only the main thread can be # interrupted. main_thread = space.fromcache(State).main_thread diff --git a/pypy/module/time/test/test_time.py b/pypy/module/time/test/test_time.py --- a/pypy/module/time/test/test_time.py +++ b/pypy/module/time/test/test_time.py @@ -19,6 +19,8 @@ raises(TypeError, time.sleep, "foo") time.sleep(0.12345) raises(IOError, time.sleep, -1.0) + raises(IOError, time.sleep, float('nan')) + raises(IOError, time.sleep, float('inf')) def test_clock(self): import time diff --git a/pypy/module/unicodedata/interp_ucd.py b/pypy/module/unicodedata/interp_ucd.py --- a/pypy/module/unicodedata/interp_ucd.py +++ b/pypy/module/unicodedata/interp_ucd.py @@ -259,10 +259,10 @@ result[0] = ch if not composed: # If decomposed normalization we are done - return space.newunicode(u''.join([unichr(i) for i in result[:j]])) + return self.build(space, result, stop=j) if j <= 1: - return space.newunicode(u''.join([unichr(i) for i in result[:j]])) + return self.build(space, result, stop=j) current = result[0] starter_pos = 0 @@ -310,7 +310,10 @@ result[starter_pos] = current - return space.newunicode(u''.join([unichr(i) for i in result[:next_insert]])) + return self.build(space, result, stop=next_insert) + + def build(self, space, r, stop): + return space.newunicode(u''.join([unichr(i) for i in r[:stop]])) methods = {} diff --git a/pypy/module/unicodedata/test/test_hyp.py b/pypy/module/unicodedata/test/test_hyp.py --- a/pypy/module/unicodedata/test/test_hyp.py +++ b/pypy/module/unicodedata/test/test_hyp.py @@ -10,7 +10,7 @@ def normalize(s): w_s = space.newunicode(s) w_res = ucd.normalize(space, NF_code, w_s) - return space.unwrap(w_res) + return space.unicode_w(w_res) return normalize all_forms = ['NFC', 'NFD', 'NFKC', 'NFKD'] diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py --- a/pypy/objspace/std/test/test_unicodeobject.py +++ b/pypy/objspace/std/test/test_unicodeobject.py @@ -135,6 +135,11 @@ check(u'a' + 'b', u'ab') check('a' + u'b', u'ab') + def test_getitem(self): + assert u'abc'[2] == 'c' + raises(IndexError, u'abc'.__getitem__, 15) + assert u'g\u0105\u015b\u0107'[2] == u'\u015b' + def test_join(self): def check(a, b): assert a == b @@ -171,6 +176,8 @@ assert u'\n\n'.splitlines() == [u'', u''] assert u'a\nb\nc'.splitlines(1) == [u'a\n', u'b\n', u'c'] assert u'\na\nb\n'.splitlines(1) == [u'\n', u'a\n', u'b\n'] + assert ((u'a' + '\xc2\x85'.decode('utf8') + u'b\n').splitlines() == + ['a', 'b']) def test_zfill(self): assert u'123'.zfill(2) == u'123' @@ -217,6 +224,7 @@ raises(ValueError, u'abc'.split, u'') raises(ValueError, 'abc'.split, u'') assert u' a b c d'.split(None, 0) == [u'a b c d'] + assert u'a\nb\u1680c'.split() == [u'a', u'b', u'c'] def test_rsplit(self): assert u"".rsplit() == [] @@ -246,6 +254,7 @@ raises(ValueError, 'abc'.rsplit, u'') assert u' a b c '.rsplit(None, 0) == [u' a b c'] assert u''.rsplit('aaa') == [u''] + assert u'a\nb\u1680c'.rsplit() == [u'a', u'b', u'c'] def test_split_rsplit_str_unicode(self): x = 'abc'.split(u'b') @@ -291,6 +300,8 @@ assert u"bROWN fOX".title() == u"Brown Fox" assert u"Brown Fox".title() == u"Brown Fox" assert u"bro!wn fox".title() == u"Bro!Wn Fox" + assert u"brow\u4321n fox".title() == u"Brow\u4321N Fox" + assert u'\ud800'.title() == u'\ud800' def test_istitle(self): assert u"".istitle() == False @@ -315,6 +326,18 @@ assert not u'\u01c5abc'.islower() assert not u'\u01c5ABC'.isupper() + def test_lower_upper(self): + assert u'a'.lower() == u'a' + assert u'A'.lower() == u'a' + assert u'\u0105'.lower() == u'\u0105' + assert u'\u0104'.lower() == u'\u0105' + assert u'\ud800'.lower() == u'\ud800' + assert u'a'.upper() == u'A' + assert u'A'.upper() == u'A' + assert u'\u0105'.upper() == u'\u0104' + assert u'\u0104'.upper() == u'\u0104' + assert u'\ud800'.upper() == u'\ud800' + def test_capitalize(self): assert u"brown fox".capitalize() == u"Brown fox" assert u' hello '.capitalize() == u' hello ' @@ -336,6 +359,8 @@ # check with Ll chars with no upper - nothing changes here assert (u'\u019b\u1d00\u1d86\u0221\u1fb7'.capitalize() == u'\u019b\u1d00\u1d86\u0221\u1fb7') + assert u'\ud800'.capitalize() == u'\ud800' + assert u'xx\ud800'.capitalize() == u'Xx\ud800' def test_rjust(self): s = u"abc" @@ -376,6 +401,16 @@ assert u'one!two!three!'.replace('x', '@') == u'one!two!three!' assert u'one!two!three!'.replace(u'x', '@', 2) == u'one!two!three!' assert u'abc'.replace('', u'-') == u'-a-b-c-' + assert u'\u1234'.replace(u'', '-') == u'-\u1234-' + assert u'\u0234\u5678'.replace('', u'-') == u'-\u0234-\u5678-' + assert u'\u0234\u5678'.replace('', u'-', 0) == u'\u0234\u5678' + assert u'\u0234\u5678'.replace('', u'-', 1) == u'-\u0234\u5678' + assert u'\u0234\u5678'.replace('', u'-', 2) == u'-\u0234-\u5678' + assert u'\u0234\u5678'.replace('', u'-', 3) == u'-\u0234-\u5678-' + assert u'\u0234\u5678'.replace('', u'-', 4) == u'-\u0234-\u5678-' + assert u'\u0234\u5678'.replace('', u'-', 700) == u'-\u0234-\u5678-' + assert u'\u0234\u5678'.replace('', u'-', -1) == u'-\u0234-\u5678-' + assert u'\u0234\u5678'.replace('', u'-', -42) == u'-\u0234-\u5678-' assert u'abc'.replace(u'', u'-', 3) == u'-a-b-c' assert u'abc'.replace('', '-', 0) == u'abc' assert u''.replace(u'', '') == u'' @@ -479,6 +514,9 @@ assert u''.startswith(u'a') is False assert u'x'.startswith(u'xx') is False assert u'y'.startswith(u'xx') is False + assert u'\u1234\u5678\u4321'.startswith(u'\u1234') is True + assert u'\u1234\u5678\u4321'.startswith(u'\u1234\u4321') is False + assert u'\u1234'.startswith(u'', 1, 0) is True def test_startswith_more(self): assert u'ab'.startswith(u'a', 0) is True @@ -589,7 +627,7 @@ raises(TypeError, u'hello'.translate) raises(TypeError, u'abababc'.translate, {ord('a'):''}) - def test_unicode_form_encoded_object(self): + def test_unicode_from_encoded_object(self): assert unicode('x', 'utf-8') == u'x' assert unicode('x', 'utf-8', 'strict') == u'x' @@ -634,6 +672,8 @@ assert u'\u20ac'.encode('utf-8') == '\xe2\x82\xac' assert u'\ud800\udc02'.encode('utf-8') == '\xf0\x90\x80\x82' assert u'\ud84d\udc56'.encode('utf-8') == '\xf0\xa3\x91\x96' + assert u'\ud800\udc02'.encode('uTf-8') == '\xf0\x90\x80\x82' + assert u'\ud84d\udc56'.encode('Utf8') == '\xf0\xa3\x91\x96' assert u'\ud800'.encode('utf-8') == '\xed\xa0\x80' assert u'\udc00'.encode('utf-8') == '\xed\xb0\x80' assert (u'\ud800\udc02'*1000).encode('utf-8') == '\xf0\x90\x80\x82'*1000 @@ -745,6 +785,7 @@ def test_index(self): assert u"rrarrrrrrrrra".index(u'a', 4, None) == 12 assert u"rrarrrrrrrrra".index(u'a', None, 6) == 2 + assert u"\u1234\u4321\u5678".index(u'\u5678', 1) == 2 def test_rindex(self): from sys import maxint @@ -754,6 +795,7 @@ assert u'abcdefghiabc'.rindex(u'abc', 0, -1) == 0 assert u'abcdefghiabc'.rindex(u'abc', -4*maxint, 4*maxint) == 9 assert u'rrarrrrrrrrra'.rindex(u'a', 4, None) == 12 + assert u"\u1234\u5678".rindex(u'\u5678') == 1 raises(ValueError, u'abcdefghiabc'.rindex, u'hib') raises(ValueError, u'defghiabc'.rindex, u'def', 1) @@ -768,12 +810,15 @@ assert u'abcdefghiabc'.rfind(u'') == 12 assert u'abcdefghiabc'.rfind(u'abcd') == 0 assert u'abcdefghiabc'.rfind(u'abcz') == -1 + assert u"\u1234\u5678".rfind(u'\u5678') == 1 def test_rfind_corner_case(self): assert u'abc'.rfind('', 4) == -1 def test_find_index_str_unicode(self): - assert 'abcdefghiabc'.find(u'bc') == 1 + assert u'abcdefghiabc'.find(u'bc') == 1 + assert u'ab\u0105b\u0107'.find('b', 2) == 3 + assert u'ab\u0105b\u0107'.find('b', 0, 1) == -1 assert 'abcdefghiabc'.rfind(u'abc') == 9 raises(UnicodeDecodeError, '\x80'.find, u'') raises(UnicodeDecodeError, '\x80'.rfind, u'') @@ -781,6 +826,7 @@ assert 'abcdefghiabc'.rindex(u'abc') == 9 raises(UnicodeDecodeError, '\x80'.index, u'') raises(UnicodeDecodeError, '\x80'.rindex, u'') + assert u"\u1234\u5678".find(u'\u5678') == 1 def test_count(self): assert u"".count(u"x") ==0 @@ -807,6 +853,7 @@ def test_swapcase(self): assert u'\xe4\xc4\xdf'.swapcase() == u'\xc4\xe4\xdf' + assert u'\ud800'.swapcase() == u'\ud800' def test_buffer(self): buf = buffer(u'XY') @@ -878,16 +925,31 @@ def test_getslice(self): assert u'123456'.__getslice__(1, 5) == u'2345' - s = u"abc" - assert s[:] == "abc" - assert s[1:] == "bc" - assert s[:2] == "ab" - assert s[1:2] == "b" - assert s[-2:] == "bc" - assert s[:-1] == "ab" - assert s[-2:2] == "b" - assert s[1:-1] == "b" - assert s[-2:-1] == "b" + s = u"\u0105b\u0107" + assert s[:] == u"\u0105b\u0107" + assert s[1:] == u"b\u0107" + assert s[:2] == u"\u0105b" + assert s[1:2] == u"b" + assert s[-2:] == u"b\u0107" + assert s[:-1] == u"\u0105b" + assert s[-2:2] == u"b" + assert s[1:-1] == u"b" + assert s[-2:-1] == u"b" + + def test_getitem_slice(self): + assert u'123456'.__getitem__(slice(1, 5)) == u'2345' + s = u"\u0105b\u0107" + assert s[slice(3)] == u"\u0105b\u0107" + assert s[slice(1, 3)] == u"b\u0107" + assert s[slice(2)] == u"\u0105b" + assert s[slice(1,2)] == u"b" + assert s[slice(-2,3)] == u"b\u0107" + assert s[slice(-1)] == u"\u0105b" + assert s[slice(-2,2)] == u"b" + assert s[slice(1,-1)] == u"b" + assert s[slice(-2,-1)] == u"b" + assert u"abcde"[::2] == u"ace" + assert u"\u0105\u0106\u0107abcd"[::2] == u"\u0105\u0107bd" def test_no_len_on_str_iter(self): iterable = u"hello" diff --git a/pypy/tool/release/force-builds.py b/pypy/tool/release/force-builds.py --- a/pypy/tool/release/force-builds.py +++ b/pypy/tool/release/force-builds.py @@ -29,7 +29,6 @@ 'pypy-c-jit-macosx-x86-64', 'pypy-c-jit-win-x86-32', 'pypy-c-jit-linux-s390x', - 'build-pypy-c-jit-linux-armhf-raring', 'build-pypy-c-jit-linux-armhf-raspbian', 'build-pypy-c-jit-linux-armel', ] diff --git a/pypy/tool/release/repackage.sh b/pypy/tool/release/repackage.sh --- a/pypy/tool/release/repackage.sh +++ b/pypy/tool/release/repackage.sh @@ -23,7 +23,7 @@ # Download latest builds from the buildmaster, rename the top # level directory, and repackage ready to be uploaded to bitbucket -for plat in linux linux64 linux-armhf-raspbian linux-armhf-raring linux-armel osx64 s390x +for plat in linux linux64 linux-armhf-raspbian linux-armel osx64 s390x do echo downloading package for $plat if wget -q --show-progress http://buildbot.pypy.org/nightly/$branchname/pypy-c-jit-latest-$plat.tar.bz2 diff --git a/rpython/annotator/bookkeeper.py b/rpython/annotator/bookkeeper.py --- a/rpython/annotator/bookkeeper.py +++ b/rpython/annotator/bookkeeper.py @@ -194,13 +194,14 @@ listdef.generalize_range_step(flags['range_step']) return SomeList(listdef) - def getdictdef(self, is_r_dict=False, force_non_null=False): + def getdictdef(self, is_r_dict=False, force_non_null=False, simple_hash_eq=False): """Get the DictDef associated with the current position.""" try: dictdef = self.dictdefs[self.position_key] except KeyError: dictdef = DictDef(self, is_r_dict=is_r_dict, - force_non_null=force_non_null) + force_non_null=force_non_null, + simple_hash_eq=simple_hash_eq) self.dictdefs[self.position_key] = dictdef return dictdef diff --git a/rpython/annotator/builtin.py b/rpython/annotator/builtin.py --- a/rpython/annotator/builtin.py +++ b/rpython/annotator/builtin.py @@ -237,22 +237,30 @@ return SomeInstance(clsdef) @analyzer_for(rpython.rlib.objectmodel.r_dict) -def robjmodel_r_dict(s_eqfn, s_hashfn, s_force_non_null=None): +def robjmodel_r_dict(s_eqfn, s_hashfn, s_force_non_null=None, s_simple_hash_eq=None): + return _r_dict_helper(SomeDict, s_eqfn, s_hashfn, s_force_non_null, s_simple_hash_eq) + +@analyzer_for(rpython.rlib.objectmodel.r_ordereddict) +def robjmodel_r_ordereddict(s_eqfn, s_hashfn, s_force_non_null=None, s_simple_hash_eq=None): + return _r_dict_helper(SomeOrderedDict, s_eqfn, s_hashfn, + s_force_non_null, s_simple_hash_eq) + +def _r_dict_helper(cls, s_eqfn, s_hashfn, s_force_non_null, s_simple_hash_eq): if s_force_non_null is None: force_non_null = False else: assert s_force_non_null.is_constant() force_non_null = s_force_non_null.const + if s_simple_hash_eq is None: + simple_hash_eq = False + else: + assert s_simple_hash_eq.is_constant() + simple_hash_eq = s_simple_hash_eq.const dictdef = getbookkeeper().getdictdef(is_r_dict=True, - force_non_null=force_non_null) + force_non_null=force_non_null, + simple_hash_eq=simple_hash_eq) dictdef.dictkey.update_rdict_annotations(s_eqfn, s_hashfn) - return SomeDict(dictdef) - -@analyzer_for(rpython.rlib.objectmodel.r_ordereddict) -def robjmodel_r_ordereddict(s_eqfn, s_hashfn): - dictdef = getbookkeeper().getdictdef(is_r_dict=True) - dictdef.dictkey.update_rdict_annotations(s_eqfn, s_hashfn) - return SomeOrderedDict(dictdef) + return cls(dictdef) @analyzer_for(rpython.rlib.objectmodel.hlinvoke) def robjmodel_hlinvoke(s_repr, s_llcallable, *args_s): diff --git a/rpython/annotator/dictdef.py b/rpython/annotator/dictdef.py --- a/rpython/annotator/dictdef.py +++ b/rpython/annotator/dictdef.py @@ -81,12 +81,14 @@ def __init__(self, bookkeeper, s_key = s_ImpossibleValue, s_value = s_ImpossibleValue, is_r_dict = False, - force_non_null = False): + force_non_null = False, + simple_hash_eq = False): self.dictkey = DictKey(bookkeeper, s_key, is_r_dict) self.dictkey.itemof[self] = True self.dictvalue = DictValue(bookkeeper, s_value) self.dictvalue.itemof[self] = True self.force_non_null = force_non_null + self.simple_hash_eq = simple_hash_eq def read_key(self, position_key): self.dictkey.read_locations.add(position_key) diff --git a/rpython/jit/metainterp/optimizeopt/intbounds.py b/rpython/jit/metainterp/optimizeopt/intbounds.py --- a/rpython/jit/metainterp/optimizeopt/intbounds.py +++ b/rpython/jit/metainterp/optimizeopt/intbounds.py @@ -25,19 +25,6 @@ return (1 << ((byte_size << 3) - 1)) - 1 -IS_64_BIT = sys.maxint > 2**32 - -def next_pow2_m1(n): - """Calculate next power of 2 greater than n minus one.""" - n |= n >> 1 - n |= n >> 2 - n |= n >> 4 - n |= n >> 8 - n |= n >> 16 - if IS_64_BIT: - n |= n >> 32 - return n - class OptIntBounds(Optimization): """Keeps track of the bounds placed on integers by guards and remove @@ -50,7 +37,7 @@ return dispatch_postprocess(self, op) def propagate_bounds_backward(self, box): - # FIXME: This takes care of the instruction where box is the reuslt + # FIXME: This takes care of the instruction where box is the result # but the bounds produced by all instructions where box is # an argument might also be tighten b = self.getintbound(box) @@ -91,14 +78,8 @@ b1 = self.getintbound(v1) v2 = self.get_box_replacement(op.getarg(1)) b2 = self.getintbound(v2) - if b1.known_ge(IntBound(0, 0)) and \ - b2.known_ge(IntBound(0, 0)): - r = self.getintbound(op) - if b1.has_upper and b2.has_upper: - mostsignificant = b1.upper | b2.upper - r.intersect(IntBound(0, next_pow2_m1(mostsignificant))) - else: - r.make_ge(IntBound(0, 0)) + b = b1.or_bound(b2) + self.getintbound(op).intersect(b) optimize_INT_OR = optimize_INT_OR_or_XOR optimize_INT_XOR = optimize_INT_OR_or_XOR @@ -112,15 +93,8 @@ def postprocess_INT_AND(self, op): b1 = self.getintbound(op.getarg(0)) b2 = self.getintbound(op.getarg(1)) - r = self.getintbound(op) - pos1 = b1.known_ge(IntBound(0, 0)) - pos2 = b2.known_ge(IntBound(0, 0)) - if pos1 or pos2: - r.make_ge(IntBound(0, 0)) - if pos1: - r.make_le(b1) - if pos2: - r.make_le(b2) + b = b1.and_bound(b2) + self.getintbound(op).intersect(b) def optimize_INT_SUB(self, op): return self.emit(op) @@ -211,16 +185,10 @@ r.intersect(b1.py_div_bound(b2)) def post_call_INT_PY_MOD(self, op): + b1 = self.getintbound(op.getarg(1)) b2 = self.getintbound(op.getarg(2)) - if b2.is_constant(): - val = b2.getint() - r = self.getintbound(op) - if val >= 0: # with Python's modulo: 0 <= (x % pos) < pos - r.make_ge(IntBound(0, 0)) - r.make_lt(IntBound(val, val)) - else: # with Python's modulo: neg < (x % neg) <= 0 - r.make_gt(IntBound(val, val)) - r.make_le(IntBound(0, 0)) + r = self.getintbound(op) + r.intersect(b1.mod_bound(b2)) def optimize_INT_LSHIFT(self, op): return self.emit(op) @@ -436,7 +404,7 @@ def optimize_INT_FORCE_GE_ZERO(self, op): b = self.getintbound(op.getarg(0)) - if b.known_ge(IntBound(0, 0)): + if b.known_nonnegative(): self.make_equal_to(op, op.getarg(0)) else: return self.emit(op) @@ -647,7 +615,7 @@ if r.is_constant(): if r.getint() == valnonzero: b1 = self.getintbound(op.getarg(0)) - if b1.known_ge(IntBound(0, 0)): + if b1.known_nonnegative(): b1.make_gt(IntBound(0, 0)) self.propagate_bounds_backward(op.getarg(0)) elif r.getint() == valzero: diff --git a/rpython/jit/metainterp/optimizeopt/intutils.py b/rpython/jit/metainterp/optimizeopt/intutils.py --- a/rpython/jit/metainterp/optimizeopt/intutils.py +++ b/rpython/jit/metainterp/optimizeopt/intutils.py @@ -12,6 +12,19 @@ MAXINT = maxint MININT = -maxint - 1 +IS_64_BIT = sys.maxint > 2**32 + +def next_pow2_m1(n): + """Calculate next power of 2 greater than n minus one.""" + n |= n >> 1 + n |= n >> 2 + n |= n >> 4 + n |= n >> 8 + n |= n >> 16 + if IS_64_BIT: + n |= n >> 32 + return n + class IntBound(AbstractInfo): _attrs_ = ('has_upper', 'has_lower', 'upper', 'lower') @@ -92,6 +105,9 @@ def known_ge(self, other): return other.known_le(self) + def known_nonnegative(self): + return self.has_lower and 0 <= self.lower + def intersect(self, other): r = False @@ -192,10 +208,22 @@ else: return IntUnbounded() + def mod_bound(self, other): + r = IntUnbounded() + if other.is_constant(): + val = other.getint() + if val >= 0: # with Python's modulo: 0 <= (x % pos) < pos + r.make_ge(IntBound(0, 0)) + r.make_lt(IntBound(val, val)) + else: # with Python's modulo: neg < (x % neg) <= 0 + r.make_gt(IntBound(val, val)) + r.make_le(IntBound(0, 0)) + return r + def lshift_bound(self, other): if self.has_upper and self.has_lower and \ other.has_upper and other.has_lower and \ - other.known_ge(IntBound(0, 0)) and \ + other.known_nonnegative() and \ other.known_lt(IntBound(LONG_BIT, LONG_BIT)): try: vals = (ovfcheck(self.upper << other.upper), @@ -211,7 +239,7 @@ def rshift_bound(self, other): if self.has_upper and self.has_lower and \ other.has_upper and other.has_lower and \ - other.known_ge(IntBound(0, 0)) and \ + other.known_nonnegative() and \ other.known_lt(IntBound(LONG_BIT, LONG_BIT)): vals = (self.upper >> other.upper, self.upper >> other.lower, @@ -221,7 +249,32 @@ else: return IntUnbounded() + def and_bound(self, other): + pos1 = self.known_nonnegative() + pos2 = other.known_nonnegative() + r = IntUnbounded() + if pos1 or pos2: + r.make_ge(IntBound(0, 0)) + if pos1: + r.make_le(self) + if pos2: + r.make_le(other) + return r + + def or_bound(self, other): + r = IntUnbounded() + if self.known_nonnegative() and \ + other.known_nonnegative(): + if self.has_upper and other.has_upper: + mostsignificant = self.upper | other.upper + r.intersect(IntBound(0, next_pow2_m1(mostsignificant))) + else: + r.make_ge(IntBound(0, 0)) + return r + def contains(self, val): + if not we_are_translated(): + assert not isinstance(val, long) if not isinstance(val, int): if ((not self.has_lower or self.lower == MININT) and not self.has_upper or self.upper == MAXINT): @@ -282,7 +335,7 @@ guards.append(op) def is_bool(self): - return (self.bounded() and self.known_ge(ConstIntBound(0)) and + return (self.bounded() and self.known_nonnegative() and self.known_le(ConstIntBound(1))) def make_bool(self): @@ -297,7 +350,7 @@ if self.known_gt(IntBound(0, 0)) or \ self.known_lt(IntBound(0, 0)): return INFO_NONNULL - if self.known_ge(IntBound(0, 0)) and \ + if self.known_nonnegative() and \ self.known_le(IntBound(0, 0)): return INFO_NULL return INFO_UNKNOWN diff --git a/rpython/jit/metainterp/optimizeopt/optimizer.py b/rpython/jit/metainterp/optimizeopt/optimizer.py --- a/rpython/jit/metainterp/optimizeopt/optimizer.py +++ b/rpython/jit/metainterp/optimizeopt/optimizer.py @@ -273,7 +273,6 @@ self.jitdriver_sd = jitdriver_sd self.cpu = metainterp_sd.cpu self.interned_refs = self.cpu.ts.new_ref_dict() - self.interned_ints = {} self.resumedata_memo = resume.ResumeDataLoopMemo(metainterp_sd) self.pendingfields = None # set temporarily to a list, normally by # heap.py, as we're about to generate a guard diff --git a/rpython/jit/metainterp/optimizeopt/test/test_intbound.py b/rpython/jit/metainterp/optimizeopt/test/test_intbound.py --- a/rpython/jit/metainterp/optimizeopt/test/test_intbound.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_intbound.py @@ -1,12 +1,34 @@ from rpython.jit.metainterp.optimizeopt.intutils import IntBound, IntUpperBound, \ - IntLowerBound, IntUnbounded -from rpython.jit.metainterp.optimizeopt.intbounds import next_pow2_m1 + IntLowerBound, IntUnbounded, next_pow2_m1 from copy import copy import sys -from rpython.rlib.rarithmetic import LONG_BIT +from rpython.rlib.rarithmetic import LONG_BIT, ovfcheck -def bound(a,b): +from hypothesis import given, strategies + +special_values = ( + range(-100, 100) + + [2 ** i for i in range(1, LONG_BIT)] + + [-2 ** i for i in range(1, LONG_BIT)] + + [2 ** i - 1 for i in range(1, LONG_BIT)] + + [-2 ** i - 1 for i in range(1, LONG_BIT)] + + [2 ** i + 1 for i in range(1, LONG_BIT)] + + [-2 ** i + 1 for i in range(1, LONG_BIT)] + + [sys.maxint, -sys.maxint-1]) + +special_values = strategies.sampled_from( + [int(v) for v in special_values if type(int(v)) is int]) + +ints = strategies.builds( + int, # strategies.integers sometimes returns a long? + special_values | strategies.integers( + min_value=int(-sys.maxint-1), max_value=sys.maxint)) + +ints_or_none = strategies.none() | ints + + +def bound(a, b): if a is None and b is None: return IntUnbounded() elif a is None: @@ -14,11 +36,55 @@ elif b is None: return IntLowerBound(a) else: - return IntBound(a,b) + return IntBound(a, b) def const(a): return bound(a,a) + +def build_bound_with_contained_number(a, b, c): + a, b, c = sorted([a, b, c]) + r = bound(a, c) + assert r.contains(b) + return r, b + +bound_with_contained_number = strategies.builds( + build_bound_with_contained_number, + ints_or_none, + ints_or_none, + ints +) + +unbounded = strategies.builds( + lambda x: (bound(None, None), int(x)), + ints +) + +lower_bounded = strategies.builds( + lambda x, y: (bound(min(x, y), None), max(x, y)), + ints, + ints _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit