Author: Ronan Lamy <[email protected]>
Branch: stricter-encode
Changeset: r90684:2ba11ddd00dc
Date: 2017-03-14 14:39 +0000
http://bitbucket.org/pypy/pypy/changeset/2ba11ddd00dc/
Log: Raise error when attempting to encode surrogates to UTF16 or UTF32
diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py
--- a/rpython/rlib/runicode.py
+++ b/rpython/rlib/runicode.py
@@ -609,6 +609,9 @@
ch = ord(s[i])
i += 1
ch2 = 0
+ if 0xD800 <= ch < 0xDFFF:
+ errorhandler(
+ errors, 'utf16', 'surrogates not allowed', s, i - 1, i)
if ch >= 0x10000:
ch2 = 0xDC00 | ((ch-0x10000) & 0x3FF)
ch = 0xD800 | ((ch-0x10000) >> 10)
@@ -774,6 +777,9 @@
ch = ord(s[i])
i += 1
ch2 = 0
+ if 0xD800 <= ch < 0xDFFF:
+ errorhandler(
+ errors, 'utf32', 'surrogates not allowed', s, i - 1, i)
if MAXUNICODE < 65536 and 0xD800 <= ch <= 0xDBFF and i < size:
ch2 = ord(s[i])
if 0xDC00 <= ch2 <= 0xDFFF:
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit