Author: Armin Rigo <[email protected]>
Branch: cffi-char16-char32
Changeset: r91509:649a8f742c90
Date: 2017-06-04 14:25 +0200
http://bitbucket.org/pypy/pypy/changeset/649a8f742c90/
Log: Detect and complain about unicode "characters" that are greater than
0x10FFFF when attempting to convert to a pair of surrogates
diff --git a/pypy/module/_cffi_backend/ctypeprim.py
b/pypy/module/_cffi_backend/ctypeprim.py
--- a/pypy/module/_cffi_backend/ctypeprim.py
+++ b/pypy/module/_cffi_backend/ctypeprim.py
@@ -179,7 +179,12 @@
return self.space.newunicode(unichardata[0])
else:
value = misc.read_raw_ulong_data(cdata, self.size) # r_uint
- u = wchar_helper.ordinal_to_unicode(value)
+ try:
+ u = wchar_helper.ordinal_to_unicode(value)
+ except wchar_helper.OutOfRange as e:
+ raise oefmt(self.space.w_ValueError,
+ "char32_t out of range for "
+ "conversion to unicode: %s", hex(e.ordinal))
return self.space.newunicode(u)
def string(self, cdataobj, maxlen):
diff --git a/pypy/module/_cffi_backend/ctypeptr.py
b/pypy/module/_cffi_backend/ctypeptr.py
--- a/pypy/module/_cffi_backend/ctypeptr.py
+++ b/pypy/module/_cffi_backend/ctypeptr.py
@@ -102,7 +102,12 @@
"(got %d characters)", self.name, n)
add_final_zero = (n != self.length)
if self.ctitem.size == 2:
- wchar_helper.unicode_to_char16(s, cdata, n, add_final_zero)
+ try:
+ wchar_helper.unicode_to_char16(s, cdata, n, add_final_zero)
+ except wchar_helper.OutOfRange as e:
+ raise oefmt(self.space.w_ValueError,
+ "unicode character ouf of range for "
+ "conversion to char16_t: %s", hex(e.ordinal))
else:
wchar_helper.unicode_to_char32(s, cdata, n, add_final_zero)
else:
diff --git a/pypy/module/_cffi_backend/wchar_helper.py
b/pypy/module/_cffi_backend/wchar_helper.py
--- a/pypy/module/_cffi_backend/wchar_helper.py
+++ b/pypy/module/_cffi_backend/wchar_helper.py
@@ -14,10 +14,12 @@
def ordinal_to_unicode(ordinal): # 'ordinal' is a r_uint
if ordinal <= 0xffff:
return unichr(intmask(ordinal))
- else:
+ elif ordinal <= 0x10ffff:
ordinal = intmask(ordinal - 0x10000)
return (unichr(0xD800 | (ordinal >> 10)) +
unichr(0xDC00 | (ordinal & 0x3FF)))
+ else:
+ raise OutOfRange(ordinal)
def is_surrogate(u, index):
return (unichr(0xD800) <= u[index + 0] <= unichr(0xDBFF) and
@@ -174,8 +176,8 @@
for uc in u:
ordinal = ord(uc)
if ordinal > 0xFFFF:
- # NB. like CPython, ignore the problem of unicode string
- # objects containing characters greater than sys.maxunicode
+ if ordinal > 0x10FFFF:
+ raise OutOfRange(ordinal)
ordinal -= 0x10000
ptr[0] = rffi.cast(rffi.USHORT, 0xD800 | (ordinal >> 10))
ptr[1] = rffi.cast(rffi.USHORT, 0xDC00 | (ordinal & 0x3FF))
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit