Author: Matti Picus <matti.pi...@gmail.com> Branch: unicode-utf8-py3 Changeset: r95131:cdcddb46fce5 Date: 2018-09-16 21:26 +0300 http://bitbucket.org/pypy/pypy/changeset/cdcddb46fce5/
Log: store utf8 and fix off-by-one diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -1206,14 +1206,15 @@ else: res_8, newindex = errorhandler( errors, public_encoding_name, 'surrogates not allowed', - s, pos - 1, pos) - for cp in rutf8.Utf8StringIterator(res_8): - if cp < 0xD800: + s, pos, pos+1) + #for cp in rutf8.Utf8StringIterator(res_8): + for cp in res_8: + if cp < 0xD800 or allow_surrogates: _STORECHAR(result, cp, byteorder) else: errorhandler('strict', public_encoding_name, 'surrogates not allowed', - s, pos-1, pos) + s, pos, pos+1) if index != newindex: # Should be uncommon index = newindex pos = rutf8._pos_at_index(s, newindex) _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit