Author: Matti Picus <matti.pi...@gmail.com> Branch: unicode-utf8-py3 Changeset: r95768:be2b370c0652 Date: 2019-02-01 15:49 +0200 http://bitbucket.org/pypy/pypy/changeset/be2b370c0652/
Log: remove round-tripping str-unicode from errors diff --git a/pypy/interpreter/error.py b/pypy/interpreter/error.py --- a/pypy/interpreter/error.py +++ b/pypy/interpreter/error.py @@ -472,19 +472,7 @@ assert len(formats) > 0, "unsupported: no % command found" return tuple(parts), tuple(formats) -def _decode_utf8(string): - # when building the error message, don't crash if the byte string - # provided is not valid UTF-8 - if isinstance(string, unicode): - return string - assert isinstance(string, str) - #return string.decode('utf8') - result, consumed = runicode.str_decode_utf_8( - string, len(string), "replace", final=True) - return result - def get_operrcls2(valuefmt): - valuefmt = valuefmt.decode('ascii') strings, formats = decompose_valuefmt(valuefmt) assert len(strings) == len(formats) + 1 try: @@ -503,49 +491,49 @@ self.setup(w_type) def _compute_value(self, space): - # TODO: avoid utf8->unicode->utf8 dance lst = [None] * (len(formats) + len(formats) + 1) + lgt = 0 for i, fmt, attr in entries: lst[i + i] = self.xstrings[i] + lgt += len(self.xstrings[i]) value = getattr(self, attr) if fmt == 'd': - result = str(value).decode('ascii') + result = str(value) + lgt += len(result) elif fmt == 'R': - result = space.realunicode_w(space.repr(value)) + result = space.utf8_w(space.repr(value)) + lgt += len(result) elif fmt == 'S': - result = space.realunicode_w(space.str(value)) + result = space.utf8_w(space.str(value)) + lgt += len(result) elif fmt == 'T': - result = _decode_utf8(space.type(value).name) + result = space.type(value).name + lgt += len(result) elif fmt == 'N': - name = value.getname(space) - if isinstance(name, unicode): - result = name - else: - result = _decode_utf8(name) + result = value.getname(space) + lgt += len(result) elif fmt == '8': # u'str\uxxxx' -> 'str\xXX\xXX' -> u"'str\xXX\xXX'" - if isinstance(value, unicode): - result = runicode.unicode_encode_utf_8(value, - len(value), 'strict', allow_surrogates=True) - else: - from pypy.interpreter import unicodehelper - result = _decode_utf8(unicodehelper.str_decode_utf8( - value, 'replace', True, - unicodehelper.decode_never_raise, True)[0]) + from pypy.interpreter import unicodehelper + result, _lgt, pos = unicodehelper.str_decode_utf8( + value, 'replace', True, + unicodehelper.decode_never_raise, True) + lgt += _lgt + elif isinstance(value, unicode): + # 's' + result = str(value.encode('utf-8')) + lgt += len(value) else: - if isinstance(value, unicode): - result = value - else: - result = _decode_utf8(str(value)) + from rpython.rlib import rutf8 + result = str(value) + # Assumes valid utf-8 + lgt += rutf8.check_utf8(result, True) lst[i + i + 1] = result lst[-1] = self.xstrings[-1] - retval = u''.join(lst) - # We need to annotate both allow_surrogates=True,False - # since this function is used to replace uni.encode('utf8') - # deep in rpython - return runicode.unicode_encode_utf_8(retval, len(retval), - 'strict', allow_surrogates=False), len(retval) - # + lgt += len(self.xstrings[-1]) + retval = ''.join(lst) + return retval, lgt + _fmtcache2[formats] = OpErrFmt return OpErrFmt, strings _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit