Author: Matti Picus <[email protected]>
Branch: unicode-utf8-py3
Changeset: r95306:5e342e9eee98
Date: 2018-11-12 19:23 -0800
http://bitbucket.org/pypy/pypy/changeset/5e342e9eee98/
Log: use utf8 without converting to unicode
diff --git a/pypy/module/_codecs/interp_codecs.py
b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -460,14 +460,14 @@
def surrogateescape_errors(space, w_exc):
check_exception(space, w_exc)
if space.isinstance_w(w_exc, space.w_UnicodeEncodeError):
- obj = space.realunicode_w(space.getattr(w_exc,
space.newtext('object')))
+ utf8 = space.utf8_w(space.getattr(w_exc, space.newtext('object')))
start = space.int_w(space.getattr(w_exc, space.newtext('start')))
w_end = space.getattr(w_exc, space.newtext('end'))
end = space.int_w(w_end)
res = ''
pos = start
while pos < end:
- ch = ord(obj[pos])
+ ch = rutf8.codepoint_at_pos(utf8, pos)
pos += 1
if ch < 0xdc80 or ch > 0xdcff:
# Not a UTF-8b surrogate, fail with original exception
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit