Author: Armin Rigo <ar...@tunes.org>
Branch: py3.6
Changeset: r97862:34cc698bbcd1
Date: 2019-10-25 15:34 +0200
http://bitbucket.org/pypy/pypy/changeset/34cc698bbcd1/

Log:    hg merge default

        This includes the changes to
        unicodehelper._str_decode_utf8_slowpath(). If these changes were not
        meant to be merged, just revert that part.

diff --git a/LICENSE b/LICENSE
--- a/LICENSE
+++ b/LICENSE
@@ -3,10 +3,11 @@
 License
 =======
 
-Except when otherwise stated (look for LICENSE files in directories or
-information at the beginning of each file) all software and documentation in
-the 'rpython', 'pypy', 'ctype_configure', 'dotviewer', 'demo', 'lib_pypy',
-'py', and '_pytest' directories is licensed as follows:
+Except when otherwise stated (look for LICENSE files in directories
+or information at the beginning of each file) all software and
+documentation in the 'rpython', 'pypy', 'ctype_configure', 'dotviewer',
+'demo', 'extra_tests', 'include', 'lib_pypy', 'py', and '_pytest'
+directories is licensed as follows:
 
     The MIT License
 
diff --git a/README.rst b/README.rst
--- a/README.rst
+++ b/README.rst
@@ -9,15 +9,15 @@
 
 The home page for the interpreter is:
 
-    http://pypy.org/
+    https://pypy.org/
 
 If you want to help developing PyPy, this documentation might help you:
 
-    http://doc.pypy.org/
+    https://doc.pypy.org/
 
 More documentation about the RPython framework can be found here:
 
-    http://rpython.readthedocs.io/
+    https://rpython.readthedocs.io/
 
 The source for the documentation is in the pypy/doc directory.
 
@@ -25,7 +25,7 @@
 Using PyPy instead of CPython
 -----------------------------
 
-Please read the information at http://pypy.org/ to find the correct way to
+Please read the information at https://pypy.org/ to find the correct way to
 download and use PyPy as an alternative to CPython. 
 
 
@@ -36,7 +36,7 @@
 interpreter. It is time-consuming and requires significant computing resources.
 More information can be found here:
 
-    http://doc.pypy.org/en/latest/build.html
+    https://doc.pypy.org/en/latest/build.html
 
 Enjoy and send us feedback!
 
diff --git a/lib-python/2.7/ensurepip/_bundled/pip-19.2.3-py2.py3-none-any.whl 
b/lib-python/2.7/ensurepip/_bundled/pip-19.2.3-py2.py3-none-any.whl
new file mode 100644
index 
e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..8118df8ac1940f8c6cb410fbc18e5fae59872b95
GIT binary patch

[cut]
diff --git 
a/lib-python/2.7/ensurepip/_bundled/setuptools-41.2.0-py2.py3-none-any.whl 
b/lib-python/2.7/ensurepip/_bundled/setuptools-41.2.0-py2.py3-none-any.whl
new file mode 100644
index 
e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..82df6f63f4ee97380af0a29d8825ae775333b86d
GIT binary patch

[cut]

diff --git a/lib_pypy/_cffi_ssl/_stdssl/__init__.py 
b/lib_pypy/_cffi_ssl/_stdssl/__init__.py
--- a/lib_pypy/_cffi_ssl/_stdssl/__init__.py
+++ b/lib_pypy/_cffi_ssl/_stdssl/__init__.py
@@ -2,8 +2,18 @@
 import time
 import _thread
 import weakref
-from _pypy_openssl import ffi
-from _pypy_openssl import lib
+
+try:
+    from _pypy_openssl import ffi
+    from _pypy_openssl import lib
+except ImportError as e:
+    import os
+    msg = "\n\nThe _ssl cffi module either doesn't exist or is incompatible 
with your machine's shared libraries.\n" + \
+          "If you have a compiler installed, you can try to rebuild it by 
running:\n" + \
+          "cd %s\n" % 
os.path.abspath(os.path.dirname(os.path.dirname(__file__))) + \
+          "%s _ssl_build.py\n" % sys.executable
+    raise ImportError(str(e) + msg)
+
 from _cffi_ssl._stdssl.certificate import (_test_decode_cert,
     _decode_certificate, _certificate_to_der)
 from _cffi_ssl._stdssl.utility import (_str_with_len, _bytes_with_len,
diff --git a/pypy/config/pypyoption.py b/pypy/config/pypyoption.py
--- a/pypy/config/pypyoption.py
+++ b/pypy/config/pypyoption.py
@@ -41,7 +41,7 @@
     "_multibytecodec", "_continuation", "_cffi_backend",
     "_csv", "_pypyjson", "_posixsubprocess", "_cppyy", # "micronumpy",
     "_jitlog",
-    #" _ssl", "_hashlib", "crypt"
+    # "_hashlib", "crypt"
 ])
 
 import rpython.rlib.rvmprof.cintf
diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -9,3 +9,10 @@
 
 Fix segfault when calling descr-methods with no arguments
 
+.. branch: https-readme
+
+Convert http -> https in README.rst
+
+.. branch: license-update
+
+Update list directories in LICENSE
diff --git a/pypy/interpreter/unicodehelper.py 
b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -7,6 +7,7 @@
 from rpython.rlib.rarithmetic import r_uint, intmask
 from rpython.rtyper.lltypesystem import rffi
 from pypy.module.unicodedata.interp_ucd import unicodedb
+from rpython.rlib import runicode
 
 @specialize.memo()
 def decode_error_handler(space):
@@ -56,7 +57,6 @@
 
 def fsdecode(space, w_string):
     from pypy.module._codecs import interp_codecs
-    from rpython.rlib import runicode
     state = space.fromcache(interp_codecs.CodecState)
     errorhandler=state.decode_error_handler
     if _WIN32:
@@ -368,7 +368,6 @@
 
     def str_decode_mbcs(s, errors, final, errorhandler, force_ignore=True):
         slen = len(s)
-        from rpython.rlib import runicode
         res, size = runicode.str_decode_mbcs(s, slen, errors, final=final,
                            errorhandler=errorhandler, 
force_ignore=force_ignore)
         res_utf8 = runicode.unicode_encode_utf_8(res, size, 'strict')
@@ -389,139 +388,150 @@
     """
     if errors is None:
         errors = 'strict'
-    slen = len(s)
-    res = StringBuilder(slen)
+    size = len(s)
+    result = StringBuilder(size)
     pos = 0
-    end = len(s)
-    while pos < end:
+    while pos < size:
         ordch1 = ord(s[pos])
         # fast path for ASCII
+        # XXX maybe use a while loop here
         if ordch1 <= 0x7F:
             pos += 1
-            res.append(chr(ordch1))
+            result.append(chr(ordch1))
             continue
 
-        if ordch1 <= 0xC1:
-            r, pos, rettype = errorhandler(errors, "utf-8", "invalid start 
byte",
-                    s, pos, pos + 1)
-            res.append(r)
-            continue
+        n = ord(runicode._utf8_code_length[ordch1 - 0x80])
+        if pos + n > size:
+            if not final:
+                break
+            # argh, this obscure block of code is mostly a copy of
+            # what follows :-(
+            charsleft = size - pos - 1 # either 0, 1, 2
+            # note: when we get the 'unexpected end of data' we need
+            # to care about the pos returned; it can be lower than size,
+            # in case we need to continue running this loop
+            if not charsleft:
+                # there's only the start byte and nothing else
+                r, pos, rettype = errorhandler(errors, 'utf-8',
+                                      'unexpected end of data',
+                                      s, pos, pos+1)
+                result.append(r)
+                continue
+            ordch2 = ord(s[pos+1])
+            if n == 3:
+                # 3-bytes seq with only a continuation byte
+                if rutf8._invalid_byte_2_of_3(ordch1, ordch2, 
allow_surrogates):
+                    # second byte invalid, take the first and continue
+                    r, pos, rettype = errorhandler(errors, 'utf-8',
+                                          'invalid continuation byte',
+                                          s, pos, pos+1)
+                    result.append(r)
+                    continue
+                else:
+                    # second byte valid, but third byte missing
+                    r, pos, rettype = errorhandler(errors, 'utf-8',
+                                      'unexpected end of data',
+                                      s, pos, pos+2)
+                    result.append(r)
+                    continue
+            elif n == 4:
+                # 4-bytes seq with 1 or 2 continuation bytes
+                if rutf8._invalid_byte_2_of_4(ordch1, ordch2):
+                    # second byte invalid, take the first and continue
+                    r, pos, rettype = errorhandler(errors, 'utf-8',
+                                          'invalid continuation byte',
+                                          s, pos, pos+1)
+                    result.append(r)
+                    continue
+                elif charsleft == 2 and 
rutf8._invalid_byte_3_of_4(ord(s[pos+2])):
+                    # third byte invalid, take the first two and continue
+                    r, pos, rettype = errorhandler(errors, 'utf-8',
+                                          'invalid continuation byte',
+                                          s, pos, pos+2)
+                    result.append(r)
+                    continue
+                else:
+                    # there's only 1 or 2 valid cb, but the others are missing
+                    r, pos, rettype = errorhandler(errors, 'utf-8',
+                                      'unexpected end of data',
+                                      s, pos, pos+charsleft+1)
+                    result.append(r)
+                    continue
+            raise AssertionError("unreachable")
 
-        pos += 1
+        if n == 0:
+            r, pos, rettype = errorhandler(errors, 'utf-8',
+                                  'invalid start byte',
+                                  s, pos, pos+1)
+            result.append(r)
 
-        if ordch1 <= 0xDF:
-            if pos >= end:
-                if not final:
-                    pos -= 1
-                    break
-                r, pos, rettype = errorhandler(errors, "utf-8", "unexpected 
end of data",
-                    s, pos - 1, pos)
-                res.append(r)
-                continue
-            ordch2 = ord(s[pos])
+        elif n == 1:
+            assert 0, "ascii should have gone through the fast path"
 
+        elif n == 2:
+            ordch2 = ord(s[pos+1])
             if rutf8._invalid_byte_2_of_2(ordch2):
-                r, pos, rettype = errorhandler(errors, "utf-8", "invalid 
continuation byte",
-                    s, pos - 1, pos)
-                res.append(r)
+                r, pos, rettype = errorhandler(errors, 'utf-8',
+                                      'invalid continuation byte',
+                                      s, pos, pos+1)
+                result.append(r)
                 continue
             # 110yyyyy 10zzzzzz -> 00000000 00000yyy yyzzzzzz
-            pos += 1
-            res.append(chr(ordch1))
-            res.append(chr(ordch2))
-            continue
+            result.append(chr(ordch1))
+            result.append(chr(ordch2))
+            pos += 2
 
-        if ordch1 <= 0xEF:
-            if (pos + 2) > end:
-                if not final:
-                    pos -= 1
-                    break
-                if (pos) < end and  rutf8._invalid_byte_2_of_3(ordch1,
-                                                ord(s[pos]), allow_surrogates):
-                    msg = "invalid continuation byte"
-                    r, pos, rettype = errorhandler(errors, "utf-8", msg, s,
-                                                   pos - 1, pos)
-                else:
-                    msg = "unexpected end of data"
-                    r, pos, rettype = errorhandler(errors, "utf-8", msg, s,
-                                                   pos - 1, pos)
-                    pos = end
-                res.append(r)
-                continue
-            ordch2 = ord(s[pos])
-            ordch3 = ord(s[pos + 1])
-
+        elif n == 3:
+            ordch2 = ord(s[pos+1])
+            ordch3 = ord(s[pos+2])
             if rutf8._invalid_byte_2_of_3(ordch1, ordch2, allow_surrogates):
-                r, pos, rettype = errorhandler(errors, "utf-8", "invalid 
continuation byte",
-                    s, pos - 1, pos)
-                res.append(r)
+                r, pos, rettype = errorhandler(errors, 'utf-8',
+                                      'invalid continuation byte',
+                                      s, pos, pos+1)
+                result.append(r)
                 continue
             elif rutf8._invalid_byte_3_of_3(ordch3):
-                r, pos, rettype = errorhandler(errors, "utf-8", "invalid 
continuation byte",
-                    s, pos - 1, pos + 1)
-                res.append(r)
+                r, pos, rettype = errorhandler(errors, 'utf-8',
+                                      'invalid continuation byte',
+                                      s, pos, pos+2)
+                result.append(r)
                 continue
-            pos += 2
+            # 1110xxxx 10yyyyyy 10zzzzzz -> 00000000 xxxxyyyy yyzzzzzz
+            result.append(chr(ordch1))
+            result.append(chr(ordch2))
+            result.append(chr(ordch3))
+            pos += 3
 
-            # 1110xxxx 10yyyyyy 10zzzzzz -> 00000000 xxxxyyyy yyzzzzzz
-            res.append(chr(ordch1))
-            res.append(chr(ordch2))
-            res.append(chr(ordch3))
-            continue
-
-        if ordch1 <= 0xF4:
-            if (pos + 3) > end:
-                if not final:
-                    pos -= 1
-                    break
-                if pos < end and rutf8._invalid_byte_2_of_4(ordch1, 
ord(s[pos])):
-                    msg = "invalid continuation byte"
-                    r, pos, rettype = errorhandler(errors, "utf-8", msg, s,
-                                                   pos - 1, pos)
-                elif pos + 1 < end and rutf8._invalid_byte_3_of_4(ord(s[pos + 
1])):
-                    msg = "invalid continuation byte"
-                    pos += 1
-                    r, pos, rettype = errorhandler(errors, "utf-8", msg, s,
-                                                   pos - 2, pos)
-                else:
-                    msg = "unexpected end of data"
-                    r, pos, rettype = errorhandler(errors, "utf-8", msg, s,
-                                                   pos - 1, pos)
-                    pos = end
-                res.append(r)
-                continue
-            ordch2 = ord(s[pos])
-            ordch3 = ord(s[pos + 1])
-            ordch4 = ord(s[pos + 2])
+        elif n == 4:
+            ordch2 = ord(s[pos+1])
+            ordch3 = ord(s[pos+2])
+            ordch4 = ord(s[pos+3])
             if rutf8._invalid_byte_2_of_4(ordch1, ordch2):
-                r, pos, rettype = errorhandler(errors, "utf-8", "invalid 
continuation byte",
-                    s, pos - 1, pos)
-                res.append(r)
+                r, pos, rettype = errorhandler(errors, 'utf-8',
+                                      'invalid continuation byte',
+                                      s, pos, pos+1)
+                result.append(r)
                 continue
             elif rutf8._invalid_byte_3_of_4(ordch3):
-                r, pos, rettype = errorhandler(errors, "utf-8", "invalid 
continuation byte",
-                    s, pos - 1, pos + 1)
-                res.append(r)
+                r, pos, rettype = errorhandler(errors, 'utf-8',
+                                      'invalid continuation byte',
+                                      s, pos, pos+2)
+                result.append(r)
                 continue
             elif rutf8._invalid_byte_4_of_4(ordch4):
-                r, pos, rettype = errorhandler(errors, "utf-8", "invalid 
continuation byte",
-                    s, pos - 1, pos + 2)
-                res.append(r)
+                r, pos, rettype = errorhandler(errors, 'utf-8',
+                                      'invalid continuation byte',
+                                      s, pos, pos+3)
+                result.append(r)
                 continue
+            # 11110www 10xxxxxx 10yyyyyy 10zzzzzz -> 000wwwxx xxxxyyyy yyzzzzzz
+            result.append(chr(ordch1))
+            result.append(chr(ordch2))
+            result.append(chr(ordch3))
+            result.append(chr(ordch4))
+            pos += 4
 
-            pos += 3
-            # 11110www 10xxxxxx 10yyyyyy 10zzzzzz -> 000wwwxx xxxxyyyy yyzzzzzz
-            res.append(chr(ordch1))
-            res.append(chr(ordch2))
-            res.append(chr(ordch3))
-            res.append(chr(ordch4))
-            continue
-
-        r, pos, rettype = errorhandler(errors, "utf-8", "invalid start byte",
-                s, pos - 1, pos)
-        res.append(r)
-
-    r = res.build()
+    r = result.build()
     return r, rutf8.check_utf8(r, True), pos
 
 hexdigits = "0123456789ABCDEFabcdef"
diff --git a/pypy/module/_codecs/test/test_codecs.py 
b/pypy/module/_codecs/test/test_codecs.py
--- a/pypy/module/_codecs/test/test_codecs.py
+++ b/pypy/module/_codecs/test/test_codecs.py
@@ -1447,3 +1447,17 @@
         assert res == 52
         raises(TypeError, u"abc".encode, "test.mynontextenc")
         raises(TypeError, b"abc".decode, "test.mynontextenc")
+
+    def test_last_byte_handler(self):
+        # issue bb-2389
+        import _codecs
+        _codecs.register_error('custom_replace', lambda exc: (u'\ufffd', 
exc.start+1))
+        for s, res in ((b"WORD\xe3\xab",
+                            (u'WORD\ufffd\ufffd', u'WORD\ufffd')),
+                       (b"\xef\xbb\xbfWORD\xe3\xabWORD2",
+                            (u'\ufeffWORD\ufffd\ufffdWORD2',
+                             u'\ufeffWORD\ufffdWORD2'))):
+            r = s.decode('utf8', 'replace')
+            assert r == res[1]
+            r = s.decode('utf8', 'custom_replace')
+            assert r == res[0]
diff --git a/pypy/module/sys/vm.py b/pypy/module/sys/vm.py
--- a/pypy/module/sys/vm.py
+++ b/pypy/module/sys/vm.py
@@ -66,6 +66,12 @@
     from rpython.rlib.rgc import increase_root_stack_depth
     if new_limit <= 0:
         raise oefmt(space.w_ValueError, "recursion limit must be positive")
+    # Some programs use very large values to mean "don't check, I want to
+    # use as much as possible and then segfault".  Add a silent upper bound
+    # of 10**6 here, because huge values cause huge shadowstacks to be
+    # allocated (or MemoryErrors).
+    if new_limit > 1000000:
+        new_limit = 1000000
     try:
         _stack_set_length_fraction(new_limit * 0.001)
         _stack_check_noinline()
diff --git a/pypy/objspace/std/test/test_newformat.py 
b/pypy/objspace/std/test/test_newformat.py
--- a/pypy/objspace/std/test/test_newformat.py
+++ b/pypy/objspace/std/test/test_newformat.py
@@ -210,6 +210,13 @@
         fmtstr = self.s("{:[XYZ}")
         assert fmtstr.format(Foo()) == "<<%r>>" % (self.s("[XYZ"),)
 
+    def test_issue3100(self):
+        class Foo:
+            def __format__(self, f):
+                return '<<%r>>' % (f,)
+        fmtstr = self.s("{:[XYZ}")
+        assert fmtstr.format(Foo()) == "<<%r>>" % (self.s("[XYZ"),)
+
 
 class AppTestUnicodeFormat(BaseStringFormatTests):
     def setup_class(cls):
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to