Author: Matti Picus <[email protected]>
Branch: unicode-utf8-py3
Changeset: r95306:5e342e9eee98
Date: 2018-11-12 19:23 -0800
http://bitbucket.org/pypy/pypy/changeset/5e342e9eee98/

Log:    use utf8 without converting to unicode

diff --git a/pypy/module/_codecs/interp_codecs.py 
b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -460,14 +460,14 @@
 def surrogateescape_errors(space, w_exc):
     check_exception(space, w_exc)
     if space.isinstance_w(w_exc, space.w_UnicodeEncodeError):
-        obj = space.realunicode_w(space.getattr(w_exc, 
space.newtext('object')))
+        utf8 = space.utf8_w(space.getattr(w_exc, space.newtext('object')))
         start = space.int_w(space.getattr(w_exc, space.newtext('start')))
         w_end = space.getattr(w_exc, space.newtext('end'))
         end = space.int_w(w_end)
         res = ''
         pos = start
         while pos < end:
-            ch = ord(obj[pos])
+            ch = rutf8.codepoint_at_pos(utf8, pos)
             pos += 1
             if ch < 0xdc80 or ch > 0xdcff:
                 # Not a UTF-8b surrogate, fail with original exception
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to