Author: Armin Rigo <[email protected]>
Branch: unicode-utf8
Changeset: r92241:41e0c8d46641
Date: 2017-08-24 11:10 +0200
http://bitbucket.org/pypy/pypy/changeset/41e0c8d46641/
Log: (fijal, arigo)
Fix
diff --git a/pypy/interpreter/unicodehelper.py
b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -44,7 +44,8 @@
string, len(string), "strict",
final=True,
errorhandler=DecodeWrapper(decode_error_handler(space)).handle,
unicodedata_handler=unicodedata_handler)
- return result_u.encode('utf8'), len(result_u)
+ # XXX argh. we want each surrogate to be encoded separately
+ return ''.join([u.encode('utf8') for u in result_u]), len(result_u)
def decode_raw_unicode_escape(space, string):
# XXX pick better length, maybe
@@ -52,7 +53,8 @@
result_u, consumed = runicode.str_decode_raw_unicode_escape(
string, len(string), "strict",
final=True,
errorhandler=DecodeWrapper(decode_error_handler(space)).handle)
- return result_u.encode('utf8'), len(result_u)
+ # XXX argh. we want each surrogate to be encoded separately
+ return ''.join([u.encode('utf8') for u in result_u]), len(result_u)
def check_utf8(space, string):
# Surrogates are accepted and not treated specially at all.
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit