[pypy-commit] pypy cffi-char16-char32: Detect and complain about unicode "characters" that are greater than

arigo Sun, 04 Jun 2017 05:27:52 -0700

Author: Armin Rigo <[email protected]>
Branch: cffi-char16-char32
Changeset: r91509:649a8f742c90
Date: 2017-06-04 14:25 +0200
http://bitbucket.org/pypy/pypy/changeset/649a8f742c90/


Log:    Detect and complain about unicode "characters" that are greater than
        0x10FFFF when attempting to convert to a pair of surrogates

diff --git a/pypy/module/_cffi_backend/ctypeprim.py 
b/pypy/module/_cffi_backend/ctypeprim.py
--- a/pypy/module/_cffi_backend/ctypeprim.py
+++ b/pypy/module/_cffi_backend/ctypeprim.py
@@ -179,7 +179,12 @@
             return self.space.newunicode(unichardata[0])
         else:
             value = misc.read_raw_ulong_data(cdata, self.size)   # r_uint
-            u = wchar_helper.ordinal_to_unicode(value)
+            try:
+                u = wchar_helper.ordinal_to_unicode(value)
+            except wchar_helper.OutOfRange as e:
+                raise oefmt(self.space.w_ValueError,
+                            "char32_t out of range for "
+                            "conversion to unicode: %s", hex(e.ordinal))
             return self.space.newunicode(u)
 
     def string(self, cdataobj, maxlen):
diff --git a/pypy/module/_cffi_backend/ctypeptr.py 
b/pypy/module/_cffi_backend/ctypeptr.py
--- a/pypy/module/_cffi_backend/ctypeptr.py
+++ b/pypy/module/_cffi_backend/ctypeptr.py
@@ -102,7 +102,12 @@
                             "(got %d characters)", self.name, n)
             add_final_zero = (n != self.length)
             if self.ctitem.size == 2:
-                wchar_helper.unicode_to_char16(s, cdata, n, add_final_zero)
+                try:
+                    wchar_helper.unicode_to_char16(s, cdata, n, add_final_zero)
+                except wchar_helper.OutOfRange as e:
+                    raise oefmt(self.space.w_ValueError,
+                                "unicode character ouf of range for "
+                                "conversion to char16_t: %s", hex(e.ordinal))
             else:
                 wchar_helper.unicode_to_char32(s, cdata, n, add_final_zero)
         else:
diff --git a/pypy/module/_cffi_backend/wchar_helper.py 
b/pypy/module/_cffi_backend/wchar_helper.py
--- a/pypy/module/_cffi_backend/wchar_helper.py
+++ b/pypy/module/_cffi_backend/wchar_helper.py
@@ -14,10 +14,12 @@
     def ordinal_to_unicode(ordinal):    # 'ordinal' is a r_uint
         if ordinal <= 0xffff:
             return unichr(intmask(ordinal))
-        else:
+        elif ordinal <= 0x10ffff:
             ordinal = intmask(ordinal - 0x10000)
             return (unichr(0xD800 | (ordinal >> 10)) +
                     unichr(0xDC00 | (ordinal & 0x3FF)))
+        else:
+            raise OutOfRange(ordinal)
 
 def is_surrogate(u, index):
     return (unichr(0xD800) <= u[index + 0] <= unichr(0xDBFF) and
@@ -174,8 +176,8 @@
         for uc in u:
             ordinal = ord(uc)
             if ordinal > 0xFFFF:
-                # NB. like CPython, ignore the problem of unicode string
-                # objects containing characters greater than sys.maxunicode
+                if ordinal > 0x10FFFF:
+                    raise OutOfRange(ordinal)
                 ordinal -= 0x10000
                 ptr[0] = rffi.cast(rffi.USHORT, 0xD800 | (ordinal >> 10))
                 ptr[1] = rffi.cast(rffi.USHORT, 0xDC00 | (ordinal & 0x3FF))
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy cffi-char16-char32: Detect and complain about unicode "characters" that are greater than

Reply via email to