Author: Matti Picus <matti.pi...@gmail.com> Branch: unicode-utf8-py3 Changeset: r95569:0b7bb06a4e4f Date: 2019-01-02 16:08 +0200 http://bitbucket.org/pypy/pypy/changeset/0b7bb06a4e4f/
Log: fix improper changes from merge diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py --- a/pypy/module/_codecs/test/test_codecs.py +++ b/pypy/module/_codecs/test/test_codecs.py @@ -723,7 +723,7 @@ assert b'\x00'.decode('unicode-internal', 'ignore') == '' def test_backslashreplace(self): - import sys + import sys, codecs sin = u"a\xac\u1234\u20ac\u8000\U0010ffff" if sys.maxunicode > 65535: expected_ascii = b"a\\xac\\u1234\\u20ac\\u8000\\U0010ffff" @@ -888,7 +888,7 @@ codecs.register_error("test.hui", handler_unicodeinternal) res = b"\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui") if sys.maxunicode > 65535: - assert res == u"\u0000\u0001" # UCS4 build + assert res == u"\u0000\u0001\u0000" # UCS4 build else: assert res == u"\x00\x00\x01" # UCS2 build @@ -945,7 +945,7 @@ def test_encode_error_bad_handler(self): import codecs codecs.register_error("test.bad_handler", lambda e: (repl, 1)) - assert u"xyz".encode("latin-1", "test.bad_handler") == "xyz" + assert u"xyz".encode("latin-1", "test.bad_handler") == b"xyz" repl = u"\u1234" raises(UnicodeEncodeError, u"\u5678".encode, "latin-1", "test.bad_handler") diff --git a/pypy/module/array/interp_array.py b/pypy/module/array/interp_array.py --- a/pypy/module/array/interp_array.py +++ b/pypy/module/array/interp_array.py @@ -1151,25 +1151,11 @@ elif mytype.typecode == 'c': return space.newbytes(item) elif mytype.typecode == 'u': - code = r_uint(ord(item)) - # cpython will allow values > sys.maxunicode - # while silently truncating the top bits - if code <= r_uint(0x7F): - # Encode ASCII - item = chr(code) - elif code <= r_uint(0x07FF): - item = (chr((0xc0 | (code >> 6))) + - chr((0x80 | (code & 0x3f)))) - elif code <= r_uint(0xFFFF): - item = (chr((0xe0 | (code >> 12))) + - chr((0x80 | ((code >> 6) & 0x3f))) + - chr((0x80 | (code & 0x3f)))) - else: - item = (chr((0xf0 | (code >> 18)) & 0xff) + - chr((0x80 | ((code >> 12) & 0x3f))) + - chr((0x80 | ((code >> 6) & 0x3f))) + - chr((0x80 | (code & 0x3f)))) - return space.newutf8(item, 1) + if ord(item) >= 0x110000: + raise oefmt(space.w_ValueError, + "array contains a unicode character out of " + "range(0x110000)") + return space.newtext(rutf8.unichr_as_utf8(ord(item)), 1) assert 0, "unreachable" # interface diff --git a/pypy/objspace/std/formatting.py b/pypy/objspace/std/formatting.py --- a/pypy/objspace/std/formatting.py +++ b/pypy/objspace/std/formatting.py @@ -499,6 +499,7 @@ else: s = '' if len(s) == 1: + self.std_wp(s) return raise oefmt(space.w_TypeError, "%c requires int or single byte") else: diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py --- a/pypy/objspace/std/test/test_unicodeobject.py +++ b/pypy/objspace/std/test/test_unicodeobject.py @@ -1276,8 +1276,8 @@ assert type(str(z)) is str assert str(z) == u'foobaz' # - assert unicode(encoding='supposedly_the_encoding') == u'' - assert unicode(errors='supposedly_the_error') == u'' + assert str(encoding='supposedly_the_encoding') == u'' + assert str(errors='supposedly_the_error') == u'' e = raises(TypeError, str, u'', 'supposedly_the_encoding') assert str(e.value) == 'decoding str is not supported' e = raises(TypeError, str, u'', errors='supposedly_the_error') _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit