Author: Matti Picus <matti.pi...@gmail.com> Branch: unicode-utf8-py3 Changeset: r94763:52d2576ff698 Date: 2018-06-13 22:42 -0700 http://bitbucket.org/pypy/pypy/changeset/52d2576ff698/
Log: try and fail to make progress with test_unicodehelper diff --git a/pypy/interpreter/test/test_unicodehelper.py b/pypy/interpreter/test/test_unicodehelper.py --- a/pypy/interpreter/test/test_unicodehelper.py +++ b/pypy/interpreter/test/test_unicodehelper.py @@ -6,14 +6,19 @@ from pypy.interpreter.unicodehelper import ( encode_utf8, str_decode_utf8, utf8_encode_utf_32_be, str_decode_utf_32_be) from pypy.interpreter.unicodehelper import encode_utf8sp, decode_utf8sp - +from pypy.interpreter.unicodehelper import utf8_encode_ascii, str_decode_ascii +from pypy.interpreter import unicodehelper as uh +from pypy.module._codecs.interp_codecs import CodecState class Hit(Exception): pass -from pypy.interpreter.unicodehelper import utf8_encode_ascii, str_decode_ascii -from pypy.interpreter import unicodehelper as uh -from pypy.module._codecs.interp_codecs import CodecState +class FakeSpace: + def __getattr__(self, name): + if name in ('w_UnicodeEncodeError', 'w_UnicodeDecodeError'): + raise Hit + raise AttributeError(name) + def decode_utf8(u): return str_decode_utf8(u, "strict", True, None) @@ -82,18 +87,23 @@ @pytest.mark.parametrize('unich', [u"\ud800", u"\udc80"]) def test_utf32_surrogates(unich): - assert (unicode_encode_utf_32_be(unich, 1, None) == + assert (utf8_encode_utf_32_be(unich.encode('utf-8'), None) == struct.pack('>i', ord(unich))) with pytest.raises(UnicodeEncodeError): - unicode_encode_utf_32_be(unich, 1, None, allow_surrogates=False) + def errorhandler(errors, enc, msg, b, startingpos, endingpos): + u = b.decode('utf-8') + raise UnicodeEncodeError(enc, u, startingpos, endingpos, msg) + utf8_encode_utf_32_be(unich.encode('utf-8'), None, errorhandler, + allow_surrogates=False) def replace_with(ru, rs): def errorhandler(errors, enc, msg, u, startingpos, endingpos): if errors == 'strict': raise UnicodeEncodeError(enc, u, startingpos, endingpos, msg) - return ru, rs, endingpos - return unicode_encode_utf_32_be( - u"<%s>" % unich, 3, None, + return ru.encode('utf-8'), endingpos + uch = u"<%s>" % unich + return utf8_encode_utf_32_be( + uch.encode('utf8'), None, errorhandler, allow_surrogates=False) assert replace_with(u'rep', None) == u'<rep>'.encode('utf-32-be') assert (replace_with(None, '\xca\xfe\xca\xfe') == diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -93,12 +93,12 @@ from pypy.module._codecs import interp_codecs state = space.fromcache(interp_codecs.CodecState) if _WIN32: - uni = space.unicode_w(w_uni) + uni = space.utf8_w(w_uni) bytes = unicode_encode_mbcs(uni, len(uni), 'strict', errorhandler=encode_error_handler(space), force_replace=False) elif _MACOSX: - uni = space.unicode_w(w_uni) + uni = space.utf8_w(w_uni) bytes = runicode.unicode_encode_utf_8_impl( uni, len(uni), 'surrogateescape', errorhandler=state.encode_error_handler, @@ -110,8 +110,8 @@ # instead from pypy.module._codecs.locale import ( unicode_encode_locale_surrogateescape) - uni = space.unicode_w(w_uni) - if u'\x00' in uni: + uni = space.utf8_w(w_uni) + if b'\x00' in uni: raise oefmt(space.w_ValueError, "embedded null character") bytes = unicode_encode_locale_surrogateescape( uni, errorhandler=encode_error_handler(space)) _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit