Author: Armin Rigo <[email protected]>
Branch: py3.5
Changeset: r91665:b86de9385d47
Date: 2017-07-02 16:12 +0200
http://bitbucket.org/pypy/pypy/changeset/b86de9385d47/
Log: Issue #2598
Try to interpret a byte string for '%s' like a utf-8 string. But
don't crash if it is not valid UTF-8; instead use the "replace"
error handler.
diff --git a/pypy/interpreter/error.py b/pypy/interpreter/error.py
--- a/pypy/interpreter/error.py
+++ b/pypy/interpreter/error.py
@@ -10,6 +10,7 @@
from rpython.rlib.objectmodel import dont_inline
from rpython.rlib import rstack, rstackovf
from rpython.rlib import rwin32
+from rpython.rlib import runicode
from pypy.interpreter import debug
@@ -468,6 +469,14 @@
assert len(formats) > 0, "unsupported: no % command found"
return tuple(parts), tuple(formats)
+def _decode_utf8(string):
+ # when building the error message, don't crash if the byte string
+ # provided is not valid UTF-8
+ assert isinstance(string, str)
+ result, consumed = runicode.str_decode_utf_8(
+ string, len(string), "replace", final=True)
+ return result
+
def get_operrcls2(valuefmt):
valuefmt = valuefmt.decode('ascii')
strings, formats = decompose_valuefmt(valuefmt)
@@ -499,13 +508,16 @@
elif fmt == 'S':
result = space.unicode_w(space.str(value))
elif fmt == 'T':
- result = space.type(value).name.decode('utf-8')
+ result = _decode_utf8(space.type(value).name)
elif fmt == 'N':
result = value.getname(space)
elif fmt == '8':
- result = value.decode('utf-8')
+ result = _decode_utf8(value)
else:
- result = unicode(value)
+ if isinstance(value, unicode):
+ result = value
+ else:
+ result = _decode_utf8(str(value))
lst[i + i + 1] = result
lst[-1] = self.xstrings[-1]
return u''.join(lst)
diff --git a/pypy/interpreter/test/test_error.py
b/pypy/interpreter/test/test_error.py
--- a/pypy/interpreter/test/test_error.py
+++ b/pypy/interpreter/test/test_error.py
@@ -92,6 +92,20 @@
operr = oefmt("w_type", "abc %8", arg)
val = operr._compute_value(space)
assert val == u"abc àèìòù"
+ #
+ # if the arg is a byte string and we specify '%s', then we
+ # also get utf-8 encoding. This should be the common case
+ # nowadays with utf-8 byte strings being common in the RPython
+ # sources of PyPy.
+ operr = oefmt("w_type", "abc %s", arg)
+ val = operr._compute_value(space)
+ assert val == u"abc àèìòù"
+ #
+ # if the byte string is not valid utf-8, then don't crash
+ arg = '\xe9'
+ operr = oefmt("w_type", "abc %8", arg)
+ val = operr._compute_value(space)
+
def test_errorstr(space):
operr = OperationError(space.w_ValueError, space.wrap("message"))
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit