Author: Carl Friedrich Bolz-Tereick <[email protected]>
Branch: py3.6
Changeset: r96164:008d52d161c2
Date: 2019-02-25 16:33 +0100
http://bitbucket.org/pypy/pypy/changeset/008d52d161c2/
Log: python3.6 (not 2.7) uses 'utf-8' in UnicodeDecodeError messages
diff --git a/pypy/interpreter/unicodehelper.py
b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -375,7 +375,7 @@
continue
if ordch1 <= 0xC1:
- r, pos, rettype = errorhandler(errors, "utf8", "invalid start
byte",
+ r, pos, rettype = errorhandler(errors, "utf-8", "invalid start
byte",
s, pos, pos + 1)
res.append(r)
continue
@@ -387,14 +387,14 @@
if not final:
pos -= 1
break
- r, pos, rettype = errorhandler(errors, "utf8", "unexpected end
of data",
+ r, pos, rettype = errorhandler(errors, "utf-8", "unexpected
end of data",
s, pos - 1, pos)
res.append(r)
continue
ordch2 = ord(s[pos])
if rutf8._invalid_byte_2_of_2(ordch2):
- r, pos, rettype = errorhandler(errors, "utf8", "invalid
continuation byte",
+ r, pos, rettype = errorhandler(errors, "utf-8", "invalid
continuation byte",
s, pos - 1, pos)
res.append(r)
continue
@@ -412,11 +412,11 @@
if (pos) < end and rutf8._invalid_byte_2_of_3(ordch1,
ord(s[pos]), allow_surrogates):
msg = "invalid continuation byte"
- r, pos, rettype = errorhandler(errors, "utf8", msg, s,
+ r, pos, rettype = errorhandler(errors, "utf-8", msg, s,
pos - 1, pos)
else:
msg = "unexpected end of data"
- r, pos, rettype = errorhandler(errors, "utf8", msg, s,
+ r, pos, rettype = errorhandler(errors, "utf-8", msg, s,
pos - 1, pos)
pos = end
res.append(r)
@@ -425,12 +425,12 @@
ordch3 = ord(s[pos + 1])
if rutf8._invalid_byte_2_of_3(ordch1, ordch2, allow_surrogates):
- r, pos, rettype = errorhandler(errors, "utf8", "invalid
continuation byte",
+ r, pos, rettype = errorhandler(errors, "utf-8", "invalid
continuation byte",
s, pos - 1, pos)
res.append(r)
continue
elif rutf8._invalid_byte_3_of_3(ordch3):
- r, pos, rettype = errorhandler(errors, "utf8", "invalid
continuation byte",
+ r, pos, rettype = errorhandler(errors, "utf-8", "invalid
continuation byte",
s, pos - 1, pos + 1)
res.append(r)
continue
@@ -449,16 +449,16 @@
break
if pos < end and rutf8._invalid_byte_2_of_4(ordch1,
ord(s[pos])):
msg = "invalid continuation byte"
- r, pos, rettype = errorhandler(errors, "utf8", msg, s,
+ r, pos, rettype = errorhandler(errors, "utf-8", msg, s,
pos - 1, pos)
elif pos + 1 < end and rutf8._invalid_byte_3_of_4(ord(s[pos +
1])):
msg = "invalid continuation byte"
pos += 1
- r, pos, rettype = errorhandler(errors, "utf8", msg, s,
+ r, pos, rettype = errorhandler(errors, "utf-8", msg, s,
pos - 2, pos)
else:
msg = "unexpected end of data"
- r, pos, rettype = errorhandler(errors, "utf8", msg, s,
+ r, pos, rettype = errorhandler(errors, "utf-8", msg, s,
pos - 1, pos)
pos = end
res.append(r)
@@ -467,17 +467,17 @@
ordch3 = ord(s[pos + 1])
ordch4 = ord(s[pos + 2])
if rutf8._invalid_byte_2_of_4(ordch1, ordch2):
- r, pos, rettype = errorhandler(errors, "utf8", "invalid
continuation byte",
+ r, pos, rettype = errorhandler(errors, "utf-8", "invalid
continuation byte",
s, pos - 1, pos)
res.append(r)
continue
elif rutf8._invalid_byte_3_of_4(ordch3):
- r, pos, rettype = errorhandler(errors, "utf8", "invalid
continuation byte",
+ r, pos, rettype = errorhandler(errors, "utf-8", "invalid
continuation byte",
s, pos - 1, pos + 1)
res.append(r)
continue
elif rutf8._invalid_byte_4_of_4(ordch4):
- r, pos, rettype = errorhandler(errors, "utf8", "invalid
continuation byte",
+ r, pos, rettype = errorhandler(errors, "utf-8", "invalid
continuation byte",
s, pos - 1, pos + 2)
res.append(r)
continue
@@ -490,7 +490,7 @@
res.append(chr(ordch4))
continue
- r, pos, rettype = errorhandler(errors, "utf8", "invalid start byte",
+ r, pos, rettype = errorhandler(errors, "utf-8", "invalid start byte",
s, pos - 1, pos)
res.append(r)
diff --git a/pypy/module/_codecs/test/test_codecs.py
b/pypy/module/_codecs/test/test_codecs.py
--- a/pypy/module/_codecs/test/test_codecs.py
+++ b/pypy/module/_codecs/test/test_codecs.py
@@ -59,6 +59,10 @@
assert str(UnicodeDecodeError(
"ascii", b"g\xfcrk", 1, 3, "ouch")) == "'ascii' codec can't decode
bytes in position 1-2: ouch"
+ def test_unicodedecodeerror_utf8(self):
+ error = raises(UnicodeDecodeError, b'\xf6'.decode, "utf-8").value
+ assert str(error) == "'utf-8' codec can't decode byte 0xf6 in position
0: invalid start byte"
+
def test_unicodetranslateerror(self):
import sys
assert str(UnicodeTranslateError(
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit