Author: fijal Branch: unicode-utf8 Changeset: r93305:a50930e1db6b Date: 2017-12-07 18:07 +0200 http://bitbucket.org/pypy/pypy/changeset/a50930e1db6b/
Log: whack the slowpath too diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -460,10 +460,12 @@ # utf-8 functions are not regular, because we have to pass # "allow_surrogates=True" -@unwrap_spec(utf8='utf8', errors='text_or_none') -def utf_8_encode(space, utf8, errors="strict"): - length, _ = rutf8.check_utf8(utf8, allow_surrogates=True) - return space.newtuple([space.newbytes(utf8), space.newint(length)]) +@unwrap_spec(errors='text_or_none') +def utf_8_encode(space, w_obj, errors="strict"): + utf8, lgt = space.utf8_len_w(w_obj) + if rutf8.has_surrogates(utf8): + utf8 = rutf8.reencode_utf8_with_surrogates(utf8) + return space.newtuple([space.newbytes(utf8), space.newint(lgt)]) #@unwrap_spec(uni=unicode, errors='text_or_none') #def utf_8_encode(space, uni, errors="strict"): # if errors is None: diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py --- a/pypy/objspace/std/test/test_unicodeobject.py +++ b/pypy/objspace/std/test/test_unicodeobject.py @@ -741,6 +741,8 @@ assert u'\u20ac'.encode('utf-8') == '\xe2\x82\xac' assert u'\ud800\udc02'.encode('utf-8') == '\xf0\x90\x80\x82' assert u'\ud84d\udc56'.encode('utf-8') == '\xf0\xa3\x91\x96' + assert u'\ud800\udc02'.encode('uTf-8') == '\xf0\x90\x80\x82' + assert u'\ud84d\udc56'.encode('Utf8') == '\xf0\xa3\x91\x96' assert u'\ud800'.encode('utf-8') == '\xed\xa0\x80' assert u'\udc00'.encode('utf-8') == '\xed\xb0\x80' assert (u'\ud800\udc02'*1000).encode('utf-8') == '\xf0\x90\x80\x82'*1000 _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit