Author: fijal Branch: unicode-utf8 Changeset: r93289:a6a28d7e46a8 Date: 2017-12-06 21:17 +0200 http://bitbucket.org/pypy/pypy/changeset/a6a28d7e46a8/
Log: try to improve latin1 handling diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -149,37 +149,32 @@ def _utf8_encode_latin_1_slowpath(s, errors, errorhandler): res = StringBuilder(len(s)) - size = len(s) cur = 0 - i = 0 - while i < size: - if ord(s[i]) <= 0x7F: - res.append(s[i]) - i += 1 - cur += 1 - else: - oc = rutf8.codepoint_at_pos(s, i) - if oc <= 0xFF: - res.append(chr(oc)) + iter = rutf8.Utf8StringIterator(s) + try: + while True: + ch = iter.next() + if ch <= 0xFF: + res.append(chr(ch)) cur += 1 - i = rutf8.next_codepoint_pos(s, i) else: r, pos = errorhandler(errors, 'latin1', 'ordinal not in range(256)', s, cur, cur + 1) - for j in range(pos - cur): - i = rutf8.next_codepoint_pos(s, i) - j = 0 - while j < len(r): - c = rutf8.codepoint_at_pos(r, j) + for c in rutf8.Utf8StringIterator(r): if c > 0xFF: errorhandler("strict", 'latin1', 'ordinal not in range(256)', s, cur, cur + 1) - j = rutf8.next_codepoint_pos(r, j) res.append(chr(c)) + + for j in range(pos - cur - 1): + iter.next() + cur = pos + except StopIteration: + pass r = res.build() return r _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit