Author: Armin Rigo <[email protected]>
Branch: unicode-utf8
Changeset: r92241:41e0c8d46641
Date: 2017-08-24 11:10 +0200
http://bitbucket.org/pypy/pypy/changeset/41e0c8d46641/

Log:    (fijal, arigo)

        Fix

diff --git a/pypy/interpreter/unicodehelper.py 
b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -44,7 +44,8 @@
         string, len(string), "strict",
         final=True, 
errorhandler=DecodeWrapper(decode_error_handler(space)).handle,
         unicodedata_handler=unicodedata_handler)
-    return result_u.encode('utf8'), len(result_u)
+    # XXX argh.  we want each surrogate to be encoded separately
+    return ''.join([u.encode('utf8') for u in result_u]), len(result_u)
 
 def decode_raw_unicode_escape(space, string):
     # XXX pick better length, maybe
@@ -52,7 +53,8 @@
     result_u, consumed = runicode.str_decode_raw_unicode_escape(
         string, len(string), "strict",
         final=True, 
errorhandler=DecodeWrapper(decode_error_handler(space)).handle)
-    return result_u.encode('utf8'), len(result_u)
+    # XXX argh.  we want each surrogate to be encoded separately
+    return ''.join([u.encode('utf8') for u in result_u]), len(result_u)
 
 def check_utf8(space, string):
     # Surrogates are accepted and not treated specially at all.
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to