Author: Matti Picus <[email protected]>
Branch: unicode-utf8
Changeset: r95554:962719fced4a
Date: 2019-01-01 15:06 +0200
http://bitbucket.org/pypy/pypy/changeset/962719fced4a/
Log: allow a = array.array('u', u'\xff'); a.byteswap(); ord(a[0]) >
sys.maxunicode
diff --git a/pypy/module/array/interp_array.py
b/pypy/module/array/interp_array.py
--- a/pypy/module/array/interp_array.py
+++ b/pypy/module/array/interp_array.py
@@ -1040,13 +1040,24 @@
return space.newbytes(item)
elif mytype.typecode == 'u':
code = r_uint(ord(item))
- try:
- return space.newutf8(rutf8.unichr_as_utf8(code), 1)
- except ValueError:
- raise oefmt(space.w_ValueError,
- "array contains a 32-bit integer that is outside "
- "the range [U+0000; U+10ffff] of valid unicode "
- "characters")
+ # cpython will allow values > sys.maxunicode
+ # while silently truncating the top bits
+ if code <= r_uint(0x7F):
+ # Encode ASCII
+ item = chr(code)
+ elif code <= r_uint(0x07FF):
+ item = (chr((0xc0 | (code >> 6))) +
+ chr((0x80 | (code & 0x3f))))
+ elif code <= r_uint(0xFFFF):
+ item = (chr((0xe0 | (code >> 12))) +
+ chr((0x80 | ((code >> 6) & 0x3f))) +
+ chr((0x80 | (code & 0x3f))))
+ else:
+ item = (chr((0xf0 | (code >> 18)) & 0xff) +
+ chr((0x80 | ((code >> 12) & 0x3f))) +
+ chr((0x80 | ((code >> 6) & 0x3f))) +
+ chr((0x80 | (code & 0x3f))))
+ return space.newutf8(item, 1)
assert 0, "unreachable"
# interface
diff --git a/pypy/objspace/std/unicodeobject.py
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -44,8 +44,12 @@
# XXX checking, remove before any performance measurments
# ifdef not_running_in_benchmark
if not we_are_translated():
- lgt = rutf8.check_utf8(utf8str, True)
- assert lgt == length
+ try:
+ lgt = rutf8.check_utf8(utf8str, True)
+ assert lgt == length
+ except:
+ # array.array can return invalid unicode
+ pass
@staticmethod
def from_utf8builder(builder):
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit