Author: Matti Picus <[email protected]>
Branch: unicode-utf8
Changeset: r94463:b1f2a7018522
Date: 2018-05-01 23:24 +0300
http://bitbucket.org/pypy/pypy/changeset/b1f2a7018522/
Log: minimize diff to py3.5
diff --git a/pypy/interpreter/unicodehelper.py
b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -1115,25 +1115,48 @@
errorhandler,
"native")
return result, c, lgt
+def py3k_str_decode_utf_32(s, size, errors, final=True,
+ errorhandler=None):
+ result, c, lgt, _ = str_decode_utf_32_helper(s, errors, final,
+ errorhandler, "native", 'utf-32-' +
BYTEORDER2,
+ allow_surrogates=False)
+ return result, c, lgt
+
def str_decode_utf_32_be(s, errors, final=True,
errorhandler=None):
result, c, lgt, _ = str_decode_utf_32_helper(s, errors, final,
errorhandler, "big")
return result, c, lgt
+def py3k_str_decode_utf_32_be(s, size, errors, final=True,
+ errorhandler=None):
+ result, c, lgt, _ = str_decode_utf_32_helper(
+ s, errors, final, errorhandler, "big", 'utf-32-be',
+ allow_surrogates=False)
+ return result, c, lgt
+
def str_decode_utf_32_le(s, errors, final=True,
errorhandler=None):
result, c, lgt, _ = str_decode_utf_32_helper(s, errors, final,
errorhandler,
"little")
return result, c, lgt
+def py3k_str_decode_utf_32_le(s, size, errors, final=True,
+ errorhandler=None):
+ result, c, lgt, _ = str_decode_utf_32_helper(
+ s, errors, final, errorhandler, "little", 'utf-32-le',
+ allow_surrogates=False)
+ return result, c, lgt
+
BOM32_DIRECT = intmask(0x0000FEFF)
BOM32_REVERSE = intmask(0xFFFE0000)
-def str_decode_utf_32_helper(s, errors, final=True,
- errorhandler=None,
+def str_decode_utf_32_helper(s, errors, final,
+ errorhandler,
byteorder="native",
- public_encoding_name='utf32'):
+ public_encoding_name='utf32',
+ allow_surrogates=True):
+ assert errorhandler is not None
bo = 0
size = len(s)
@@ -1196,14 +1219,21 @@
continue
ch = ((ord(s[pos + iorder[3]]) << 24) | (ord(s[pos + iorder[2]]) <<
16) |
(ord(s[pos + iorder[1]]) << 8) | ord(s[pos + iorder[0]]))
- if ch >= 0x110000:
+ if not allow_surrogates and 0xD800 <= ch <= 0xDFFF:
+ r, pos = errorhandler(errors, public_encoding_name,
+ "code point in surrogate code point "
+ "range(0xd800, 0xe000)",
+ s, pos, pos + 4)
+ result.append(r)
+ continue
+ elif ch >= 0x110000:
r, pos = errorhandler(errors, public_encoding_name,
"codepoint not in range(0x110000)",
s, pos, len(s))
result.append(r)
continue
- rutf8.unichr_as_utf8_append(result, ch, allow_surrogates=True)
+ rutf8.unichr_as_utf8_append(result, ch,
allow_surrogates=allow_surrogates)
pos += 4
r = result.build()
lgt = rutf8.check_utf8(r, True)
@@ -1283,24 +1313,6 @@
return unicode_encode_utf_32_helper(s, errors, errorhandler,
allow_surrogates, "little")
-def py3k_str_decode_utf_32(s, size, errors, final=True,
- errorhandler=None):
- result, length, byteorder = str_decode_utf_32_helper(
- s, size, errors, final, errorhandler, "native", 'utf-32-' + BYTEORDER2)
- return result, length
-
-def py3k_str_decode_utf_32_be(s, size, errors, final=True,
- errorhandler=None):
- result, length, byteorder = str_decode_utf_32_helper(
- s, size, errors, final, errorhandler, "big", 'utf-32-be')
- return result, length
-
-def py3k_str_decode_utf_32_le(s, size, errors, final=True,
- errorhandler=None):
- result, length, byteorder = str_decode_utf_32_helper(
- s, size, errors, final, errorhandler, "little", 'utf-32-le')
- return result, length
-
def py3k_unicode_encode_utf_32(s, size, errors,
errorhandler=None, allow_surrogates=True):
return unicode_encode_utf_32_helper(s, size, errors, errorhandler,
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit