Author: Armin Rigo <ar...@tunes.org> Branch: cffi-char16-char32 Changeset: r91509:649a8f742c90 Date: 2017-06-04 14:25 +0200 http://bitbucket.org/pypy/pypy/changeset/649a8f742c90/
Log: Detect and complain about unicode "characters" that are greater than 0x10FFFF when attempting to convert to a pair of surrogates diff --git a/pypy/module/_cffi_backend/ctypeprim.py b/pypy/module/_cffi_backend/ctypeprim.py --- a/pypy/module/_cffi_backend/ctypeprim.py +++ b/pypy/module/_cffi_backend/ctypeprim.py @@ -179,7 +179,12 @@ return self.space.newunicode(unichardata[0]) else: value = misc.read_raw_ulong_data(cdata, self.size) # r_uint - u = wchar_helper.ordinal_to_unicode(value) + try: + u = wchar_helper.ordinal_to_unicode(value) + except wchar_helper.OutOfRange as e: + raise oefmt(self.space.w_ValueError, + "char32_t out of range for " + "conversion to unicode: %s", hex(e.ordinal)) return self.space.newunicode(u) def string(self, cdataobj, maxlen): diff --git a/pypy/module/_cffi_backend/ctypeptr.py b/pypy/module/_cffi_backend/ctypeptr.py --- a/pypy/module/_cffi_backend/ctypeptr.py +++ b/pypy/module/_cffi_backend/ctypeptr.py @@ -102,7 +102,12 @@ "(got %d characters)", self.name, n) add_final_zero = (n != self.length) if self.ctitem.size == 2: - wchar_helper.unicode_to_char16(s, cdata, n, add_final_zero) + try: + wchar_helper.unicode_to_char16(s, cdata, n, add_final_zero) + except wchar_helper.OutOfRange as e: + raise oefmt(self.space.w_ValueError, + "unicode character ouf of range for " + "conversion to char16_t: %s", hex(e.ordinal)) else: wchar_helper.unicode_to_char32(s, cdata, n, add_final_zero) else: diff --git a/pypy/module/_cffi_backend/wchar_helper.py b/pypy/module/_cffi_backend/wchar_helper.py --- a/pypy/module/_cffi_backend/wchar_helper.py +++ b/pypy/module/_cffi_backend/wchar_helper.py @@ -14,10 +14,12 @@ def ordinal_to_unicode(ordinal): # 'ordinal' is a r_uint if ordinal <= 0xffff: return unichr(intmask(ordinal)) - else: + elif ordinal <= 0x10ffff: ordinal = intmask(ordinal - 0x10000) return (unichr(0xD800 | (ordinal >> 10)) + unichr(0xDC00 | (ordinal & 0x3FF))) + else: + raise OutOfRange(ordinal) def is_surrogate(u, index): return (unichr(0xD800) <= u[index + 0] <= unichr(0xDBFF) and @@ -174,8 +176,8 @@ for uc in u: ordinal = ord(uc) if ordinal > 0xFFFF: - # NB. like CPython, ignore the problem of unicode string - # objects containing characters greater than sys.maxunicode + if ordinal > 0x10FFFF: + raise OutOfRange(ordinal) ordinal -= 0x10000 ptr[0] = rffi.cast(rffi.USHORT, 0xD800 | (ordinal >> 10)) ptr[1] = rffi.cast(rffi.USHORT, 0xDC00 | (ordinal & 0x3FF)) _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit