Author: Armin Rigo <ar...@tunes.org> Branch: py3.6 Changeset: r97862:34cc698bbcd1 Date: 2019-10-25 15:34 +0200 http://bitbucket.org/pypy/pypy/changeset/34cc698bbcd1/
Log: hg merge default This includes the changes to unicodehelper._str_decode_utf8_slowpath(). If these changes were not meant to be merged, just revert that part. diff --git a/LICENSE b/LICENSE --- a/LICENSE +++ b/LICENSE @@ -3,10 +3,11 @@ License ======= -Except when otherwise stated (look for LICENSE files in directories or -information at the beginning of each file) all software and documentation in -the 'rpython', 'pypy', 'ctype_configure', 'dotviewer', 'demo', 'lib_pypy', -'py', and '_pytest' directories is licensed as follows: +Except when otherwise stated (look for LICENSE files in directories +or information at the beginning of each file) all software and +documentation in the 'rpython', 'pypy', 'ctype_configure', 'dotviewer', +'demo', 'extra_tests', 'include', 'lib_pypy', 'py', and '_pytest' +directories is licensed as follows: The MIT License diff --git a/README.rst b/README.rst --- a/README.rst +++ b/README.rst @@ -9,15 +9,15 @@ The home page for the interpreter is: - http://pypy.org/ + https://pypy.org/ If you want to help developing PyPy, this documentation might help you: - http://doc.pypy.org/ + https://doc.pypy.org/ More documentation about the RPython framework can be found here: - http://rpython.readthedocs.io/ + https://rpython.readthedocs.io/ The source for the documentation is in the pypy/doc directory. @@ -25,7 +25,7 @@ Using PyPy instead of CPython ----------------------------- -Please read the information at http://pypy.org/ to find the correct way to +Please read the information at https://pypy.org/ to find the correct way to download and use PyPy as an alternative to CPython. @@ -36,7 +36,7 @@ interpreter. It is time-consuming and requires significant computing resources. More information can be found here: - http://doc.pypy.org/en/latest/build.html + https://doc.pypy.org/en/latest/build.html Enjoy and send us feedback! diff --git a/lib-python/2.7/ensurepip/_bundled/pip-19.2.3-py2.py3-none-any.whl b/lib-python/2.7/ensurepip/_bundled/pip-19.2.3-py2.py3-none-any.whl new file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..8118df8ac1940f8c6cb410fbc18e5fae59872b95 GIT binary patch [cut] diff --git a/lib-python/2.7/ensurepip/_bundled/setuptools-41.2.0-py2.py3-none-any.whl b/lib-python/2.7/ensurepip/_bundled/setuptools-41.2.0-py2.py3-none-any.whl new file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..82df6f63f4ee97380af0a29d8825ae775333b86d GIT binary patch [cut] diff --git a/lib_pypy/_cffi_ssl/_stdssl/__init__.py b/lib_pypy/_cffi_ssl/_stdssl/__init__.py --- a/lib_pypy/_cffi_ssl/_stdssl/__init__.py +++ b/lib_pypy/_cffi_ssl/_stdssl/__init__.py @@ -2,8 +2,18 @@ import time import _thread import weakref -from _pypy_openssl import ffi -from _pypy_openssl import lib + +try: + from _pypy_openssl import ffi + from _pypy_openssl import lib +except ImportError as e: + import os + msg = "\n\nThe _ssl cffi module either doesn't exist or is incompatible with your machine's shared libraries.\n" + \ + "If you have a compiler installed, you can try to rebuild it by running:\n" + \ + "cd %s\n" % os.path.abspath(os.path.dirname(os.path.dirname(__file__))) + \ + "%s _ssl_build.py\n" % sys.executable + raise ImportError(str(e) + msg) + from _cffi_ssl._stdssl.certificate import (_test_decode_cert, _decode_certificate, _certificate_to_der) from _cffi_ssl._stdssl.utility import (_str_with_len, _bytes_with_len, diff --git a/pypy/config/pypyoption.py b/pypy/config/pypyoption.py --- a/pypy/config/pypyoption.py +++ b/pypy/config/pypyoption.py @@ -41,7 +41,7 @@ "_multibytecodec", "_continuation", "_cffi_backend", "_csv", "_pypyjson", "_posixsubprocess", "_cppyy", # "micronumpy", "_jitlog", - #" _ssl", "_hashlib", "crypt" + # "_hashlib", "crypt" ]) import rpython.rlib.rvmprof.cintf diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -9,3 +9,10 @@ Fix segfault when calling descr-methods with no arguments +.. branch: https-readme + +Convert http -> https in README.rst + +.. branch: license-update + +Update list directories in LICENSE diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -7,6 +7,7 @@ from rpython.rlib.rarithmetic import r_uint, intmask from rpython.rtyper.lltypesystem import rffi from pypy.module.unicodedata.interp_ucd import unicodedb +from rpython.rlib import runicode @specialize.memo() def decode_error_handler(space): @@ -56,7 +57,6 @@ def fsdecode(space, w_string): from pypy.module._codecs import interp_codecs - from rpython.rlib import runicode state = space.fromcache(interp_codecs.CodecState) errorhandler=state.decode_error_handler if _WIN32: @@ -368,7 +368,6 @@ def str_decode_mbcs(s, errors, final, errorhandler, force_ignore=True): slen = len(s) - from rpython.rlib import runicode res, size = runicode.str_decode_mbcs(s, slen, errors, final=final, errorhandler=errorhandler, force_ignore=force_ignore) res_utf8 = runicode.unicode_encode_utf_8(res, size, 'strict') @@ -389,139 +388,150 @@ """ if errors is None: errors = 'strict' - slen = len(s) - res = StringBuilder(slen) + size = len(s) + result = StringBuilder(size) pos = 0 - end = len(s) - while pos < end: + while pos < size: ordch1 = ord(s[pos]) # fast path for ASCII + # XXX maybe use a while loop here if ordch1 <= 0x7F: pos += 1 - res.append(chr(ordch1)) + result.append(chr(ordch1)) continue - if ordch1 <= 0xC1: - r, pos, rettype = errorhandler(errors, "utf-8", "invalid start byte", - s, pos, pos + 1) - res.append(r) - continue + n = ord(runicode._utf8_code_length[ordch1 - 0x80]) + if pos + n > size: + if not final: + break + # argh, this obscure block of code is mostly a copy of + # what follows :-( + charsleft = size - pos - 1 # either 0, 1, 2 + # note: when we get the 'unexpected end of data' we need + # to care about the pos returned; it can be lower than size, + # in case we need to continue running this loop + if not charsleft: + # there's only the start byte and nothing else + r, pos, rettype = errorhandler(errors, 'utf-8', + 'unexpected end of data', + s, pos, pos+1) + result.append(r) + continue + ordch2 = ord(s[pos+1]) + if n == 3: + # 3-bytes seq with only a continuation byte + if rutf8._invalid_byte_2_of_3(ordch1, ordch2, allow_surrogates): + # second byte invalid, take the first and continue + r, pos, rettype = errorhandler(errors, 'utf-8', + 'invalid continuation byte', + s, pos, pos+1) + result.append(r) + continue + else: + # second byte valid, but third byte missing + r, pos, rettype = errorhandler(errors, 'utf-8', + 'unexpected end of data', + s, pos, pos+2) + result.append(r) + continue + elif n == 4: + # 4-bytes seq with 1 or 2 continuation bytes + if rutf8._invalid_byte_2_of_4(ordch1, ordch2): + # second byte invalid, take the first and continue + r, pos, rettype = errorhandler(errors, 'utf-8', + 'invalid continuation byte', + s, pos, pos+1) + result.append(r) + continue + elif charsleft == 2 and rutf8._invalid_byte_3_of_4(ord(s[pos+2])): + # third byte invalid, take the first two and continue + r, pos, rettype = errorhandler(errors, 'utf-8', + 'invalid continuation byte', + s, pos, pos+2) + result.append(r) + continue + else: + # there's only 1 or 2 valid cb, but the others are missing + r, pos, rettype = errorhandler(errors, 'utf-8', + 'unexpected end of data', + s, pos, pos+charsleft+1) + result.append(r) + continue + raise AssertionError("unreachable") - pos += 1 + if n == 0: + r, pos, rettype = errorhandler(errors, 'utf-8', + 'invalid start byte', + s, pos, pos+1) + result.append(r) - if ordch1 <= 0xDF: - if pos >= end: - if not final: - pos -= 1 - break - r, pos, rettype = errorhandler(errors, "utf-8", "unexpected end of data", - s, pos - 1, pos) - res.append(r) - continue - ordch2 = ord(s[pos]) + elif n == 1: + assert 0, "ascii should have gone through the fast path" + elif n == 2: + ordch2 = ord(s[pos+1]) if rutf8._invalid_byte_2_of_2(ordch2): - r, pos, rettype = errorhandler(errors, "utf-8", "invalid continuation byte", - s, pos - 1, pos) - res.append(r) + r, pos, rettype = errorhandler(errors, 'utf-8', + 'invalid continuation byte', + s, pos, pos+1) + result.append(r) continue # 110yyyyy 10zzzzzz -> 00000000 00000yyy yyzzzzzz - pos += 1 - res.append(chr(ordch1)) - res.append(chr(ordch2)) - continue + result.append(chr(ordch1)) + result.append(chr(ordch2)) + pos += 2 - if ordch1 <= 0xEF: - if (pos + 2) > end: - if not final: - pos -= 1 - break - if (pos) < end and rutf8._invalid_byte_2_of_3(ordch1, - ord(s[pos]), allow_surrogates): - msg = "invalid continuation byte" - r, pos, rettype = errorhandler(errors, "utf-8", msg, s, - pos - 1, pos) - else: - msg = "unexpected end of data" - r, pos, rettype = errorhandler(errors, "utf-8", msg, s, - pos - 1, pos) - pos = end - res.append(r) - continue - ordch2 = ord(s[pos]) - ordch3 = ord(s[pos + 1]) - + elif n == 3: + ordch2 = ord(s[pos+1]) + ordch3 = ord(s[pos+2]) if rutf8._invalid_byte_2_of_3(ordch1, ordch2, allow_surrogates): - r, pos, rettype = errorhandler(errors, "utf-8", "invalid continuation byte", - s, pos - 1, pos) - res.append(r) + r, pos, rettype = errorhandler(errors, 'utf-8', + 'invalid continuation byte', + s, pos, pos+1) + result.append(r) continue elif rutf8._invalid_byte_3_of_3(ordch3): - r, pos, rettype = errorhandler(errors, "utf-8", "invalid continuation byte", - s, pos - 1, pos + 1) - res.append(r) + r, pos, rettype = errorhandler(errors, 'utf-8', + 'invalid continuation byte', + s, pos, pos+2) + result.append(r) continue - pos += 2 + # 1110xxxx 10yyyyyy 10zzzzzz -> 00000000 xxxxyyyy yyzzzzzz + result.append(chr(ordch1)) + result.append(chr(ordch2)) + result.append(chr(ordch3)) + pos += 3 - # 1110xxxx 10yyyyyy 10zzzzzz -> 00000000 xxxxyyyy yyzzzzzz - res.append(chr(ordch1)) - res.append(chr(ordch2)) - res.append(chr(ordch3)) - continue - - if ordch1 <= 0xF4: - if (pos + 3) > end: - if not final: - pos -= 1 - break - if pos < end and rutf8._invalid_byte_2_of_4(ordch1, ord(s[pos])): - msg = "invalid continuation byte" - r, pos, rettype = errorhandler(errors, "utf-8", msg, s, - pos - 1, pos) - elif pos + 1 < end and rutf8._invalid_byte_3_of_4(ord(s[pos + 1])): - msg = "invalid continuation byte" - pos += 1 - r, pos, rettype = errorhandler(errors, "utf-8", msg, s, - pos - 2, pos) - else: - msg = "unexpected end of data" - r, pos, rettype = errorhandler(errors, "utf-8", msg, s, - pos - 1, pos) - pos = end - res.append(r) - continue - ordch2 = ord(s[pos]) - ordch3 = ord(s[pos + 1]) - ordch4 = ord(s[pos + 2]) + elif n == 4: + ordch2 = ord(s[pos+1]) + ordch3 = ord(s[pos+2]) + ordch4 = ord(s[pos+3]) if rutf8._invalid_byte_2_of_4(ordch1, ordch2): - r, pos, rettype = errorhandler(errors, "utf-8", "invalid continuation byte", - s, pos - 1, pos) - res.append(r) + r, pos, rettype = errorhandler(errors, 'utf-8', + 'invalid continuation byte', + s, pos, pos+1) + result.append(r) continue elif rutf8._invalid_byte_3_of_4(ordch3): - r, pos, rettype = errorhandler(errors, "utf-8", "invalid continuation byte", - s, pos - 1, pos + 1) - res.append(r) + r, pos, rettype = errorhandler(errors, 'utf-8', + 'invalid continuation byte', + s, pos, pos+2) + result.append(r) continue elif rutf8._invalid_byte_4_of_4(ordch4): - r, pos, rettype = errorhandler(errors, "utf-8", "invalid continuation byte", - s, pos - 1, pos + 2) - res.append(r) + r, pos, rettype = errorhandler(errors, 'utf-8', + 'invalid continuation byte', + s, pos, pos+3) + result.append(r) continue + # 11110www 10xxxxxx 10yyyyyy 10zzzzzz -> 000wwwxx xxxxyyyy yyzzzzzz + result.append(chr(ordch1)) + result.append(chr(ordch2)) + result.append(chr(ordch3)) + result.append(chr(ordch4)) + pos += 4 - pos += 3 - # 11110www 10xxxxxx 10yyyyyy 10zzzzzz -> 000wwwxx xxxxyyyy yyzzzzzz - res.append(chr(ordch1)) - res.append(chr(ordch2)) - res.append(chr(ordch3)) - res.append(chr(ordch4)) - continue - - r, pos, rettype = errorhandler(errors, "utf-8", "invalid start byte", - s, pos - 1, pos) - res.append(r) - - r = res.build() + r = result.build() return r, rutf8.check_utf8(r, True), pos hexdigits = "0123456789ABCDEFabcdef" diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py --- a/pypy/module/_codecs/test/test_codecs.py +++ b/pypy/module/_codecs/test/test_codecs.py @@ -1447,3 +1447,17 @@ assert res == 52 raises(TypeError, u"abc".encode, "test.mynontextenc") raises(TypeError, b"abc".decode, "test.mynontextenc") + + def test_last_byte_handler(self): + # issue bb-2389 + import _codecs + _codecs.register_error('custom_replace', lambda exc: (u'\ufffd', exc.start+1)) + for s, res in ((b"WORD\xe3\xab", + (u'WORD\ufffd\ufffd', u'WORD\ufffd')), + (b"\xef\xbb\xbfWORD\xe3\xabWORD2", + (u'\ufeffWORD\ufffd\ufffdWORD2', + u'\ufeffWORD\ufffdWORD2'))): + r = s.decode('utf8', 'replace') + assert r == res[1] + r = s.decode('utf8', 'custom_replace') + assert r == res[0] diff --git a/pypy/module/sys/vm.py b/pypy/module/sys/vm.py --- a/pypy/module/sys/vm.py +++ b/pypy/module/sys/vm.py @@ -66,6 +66,12 @@ from rpython.rlib.rgc import increase_root_stack_depth if new_limit <= 0: raise oefmt(space.w_ValueError, "recursion limit must be positive") + # Some programs use very large values to mean "don't check, I want to + # use as much as possible and then segfault". Add a silent upper bound + # of 10**6 here, because huge values cause huge shadowstacks to be + # allocated (or MemoryErrors). + if new_limit > 1000000: + new_limit = 1000000 try: _stack_set_length_fraction(new_limit * 0.001) _stack_check_noinline() diff --git a/pypy/objspace/std/test/test_newformat.py b/pypy/objspace/std/test/test_newformat.py --- a/pypy/objspace/std/test/test_newformat.py +++ b/pypy/objspace/std/test/test_newformat.py @@ -210,6 +210,13 @@ fmtstr = self.s("{:[XYZ}") assert fmtstr.format(Foo()) == "<<%r>>" % (self.s("[XYZ"),) + def test_issue3100(self): + class Foo: + def __format__(self, f): + return '<<%r>>' % (f,) + fmtstr = self.s("{:[XYZ}") + assert fmtstr.format(Foo()) == "<<%r>>" % (self.s("[XYZ"),) + class AppTestUnicodeFormat(BaseStringFormatTests): def setup_class(cls): _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit