Author: Ronan Lamy <[email protected]>
Branch: py3.6
Changeset: r98388:f574e8884bed
Date: 2019-12-24 17:30 +0100
http://bitbucket.org/pypy/pypy/changeset/f574e8884bed/
Log: Don't swallow the UnicodeDecodeError in one corner case (fixes issue
#3132)
diff --git a/pypy/interpreter/unicodehelper.py
b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -1209,8 +1209,6 @@
r, pos, rettype = errorhandler(errors, public_encoding_name,
errmsg, s, pos, len(s))
result.append(r)
- if len(s) - pos < 2:
- break
elif 0xD800 <= ch <= 0xDBFF:
ch2 = (ord(s[pos+ihi]) << 8) | ord(s[pos+ilo])
pos += 2
diff --git a/pypy/objspace/std/stringmethods.py
b/pypy/objspace/std/stringmethods.py
--- a/pypy/objspace/std/stringmethods.py
+++ b/pypy/objspace/std/stringmethods.py
@@ -482,7 +482,7 @@
def _lower_in_str(self, value, i):
# overridden in unicodeobject.py
return self._lower(value[i])
-
+
# This is not used for W_UnicodeObject.
def descr_partition(self, space, w_sub):
from pypy.objspace.std.bytearrayobject import W_BytearrayObject
diff --git a/pypy/objspace/std/test/test_bytesobject.py
b/pypy/objspace/std/test/test_bytesobject.py
--- a/pypy/objspace/std/test/test_bytesobject.py
+++ b/pypy/objspace/std/test/test_bytesobject.py
@@ -803,6 +803,10 @@
assert b'hello'.decode('ascii') == 'hello'
raises(UnicodeDecodeError, b'he\x97lo'.decode, 'ascii')
+ def test_decode_surrogatepass_issue_3132(self):
+ with raises(UnicodeDecodeError):
+ b"\xd8=a".decode("utf-16-be", "surrogatepass")
+
def test_encode(self):
assert 'hello'.encode() == b'hello'
assert type('hello'.encode()) is bytes
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit