Author: Ronan Lamy <[email protected]>
Branch: stricter-encode
Changeset: r90684:2ba11ddd00dc
Date: 2017-03-14 14:39 +0000
http://bitbucket.org/pypy/pypy/changeset/2ba11ddd00dc/

Log:    Raise error when attempting to encode surrogates to UTF16 or UTF32

diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py
--- a/rpython/rlib/runicode.py
+++ b/rpython/rlib/runicode.py
@@ -609,6 +609,9 @@
         ch = ord(s[i])
         i += 1
         ch2 = 0
+        if 0xD800 <= ch < 0xDFFF:
+            errorhandler(
+                errors, 'utf16', 'surrogates not allowed', s, i - 1, i)
         if ch >= 0x10000:
             ch2 = 0xDC00 | ((ch-0x10000) & 0x3FF)
             ch  = 0xD800 | ((ch-0x10000) >> 10)
@@ -774,6 +777,9 @@
         ch = ord(s[i])
         i += 1
         ch2 = 0
+        if 0xD800 <= ch < 0xDFFF:
+            errorhandler(
+                errors, 'utf32', 'surrogates not allowed', s, i - 1, i)
         if MAXUNICODE < 65536 and 0xD800 <= ch <= 0xDBFF and i < size:
             ch2 = ord(s[i])
             if 0xDC00 <= ch2 <= 0xDFFF:
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to