Author: Matti Picus <[email protected]>
Branch: unicode-utf8
Changeset: r94463:b1f2a7018522
Date: 2018-05-01 23:24 +0300
http://bitbucket.org/pypy/pypy/changeset/b1f2a7018522/

Log:    minimize diff to py3.5

diff --git a/pypy/interpreter/unicodehelper.py 
b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -1115,25 +1115,48 @@
                                                          errorhandler, 
"native")
     return result, c, lgt
 
+def py3k_str_decode_utf_32(s, size, errors, final=True,
+                           errorhandler=None):
+    result, c, lgt, _ = str_decode_utf_32_helper(s, errors, final,
+                                 errorhandler, "native", 'utf-32-' + 
BYTEORDER2,
+        allow_surrogates=False)
+    return result, c, lgt
+
 def str_decode_utf_32_be(s, errors, final=True,
                          errorhandler=None):
     result, c, lgt, _ = str_decode_utf_32_helper(s, errors, final,
                                                          errorhandler, "big")
     return result, c, lgt
 
+def py3k_str_decode_utf_32_be(s, size, errors, final=True,
+                              errorhandler=None):
+    result, c, lgt, _ = str_decode_utf_32_helper(
+        s, errors, final, errorhandler, "big", 'utf-32-be',
+        allow_surrogates=False)
+    return result, c, lgt
+
 def str_decode_utf_32_le(s, errors, final=True,
                          errorhandler=None):
     result, c, lgt, _ = str_decode_utf_32_helper(s, errors, final,
                                                          errorhandler, 
"little")
     return result, c, lgt
 
+def py3k_str_decode_utf_32_le(s, size, errors, final=True,
+                              errorhandler=None):
+    result, c, lgt, _ = str_decode_utf_32_helper(
+        s, errors, final, errorhandler, "little", 'utf-32-le',
+        allow_surrogates=False)
+    return result, c, lgt
+
 BOM32_DIRECT  = intmask(0x0000FEFF)
 BOM32_REVERSE = intmask(0xFFFE0000)
 
-def str_decode_utf_32_helper(s, errors, final=True,
-                             errorhandler=None,
+def str_decode_utf_32_helper(s, errors, final,
+                             errorhandler,
                              byteorder="native",
-                             public_encoding_name='utf32'):
+                             public_encoding_name='utf32',
+                             allow_surrogates=True):
+    assert errorhandler is not None
     bo = 0
     size = len(s)
 
@@ -1196,14 +1219,21 @@
             continue
         ch = ((ord(s[pos + iorder[3]]) << 24) | (ord(s[pos + iorder[2]]) << 
16) |
               (ord(s[pos + iorder[1]]) << 8)  | ord(s[pos + iorder[0]]))
-        if ch >= 0x110000:
+        if not allow_surrogates and 0xD800 <= ch <= 0xDFFF:
+            r, pos = errorhandler(errors, public_encoding_name,
+                                  "code point in surrogate code point "
+                                  "range(0xd800, 0xe000)",
+                                  s, pos, pos + 4)
+            result.append(r)
+            continue
+        elif ch >= 0x110000:
             r, pos = errorhandler(errors, public_encoding_name,
                                   "codepoint not in range(0x110000)",
                                   s, pos, len(s))
             result.append(r)
             continue
 
-        rutf8.unichr_as_utf8_append(result, ch, allow_surrogates=True)
+        rutf8.unichr_as_utf8_append(result, ch, 
allow_surrogates=allow_surrogates)
         pos += 4
     r = result.build()
     lgt = rutf8.check_utf8(r, True)
@@ -1283,24 +1313,6 @@
     return unicode_encode_utf_32_helper(s, errors, errorhandler,
                                         allow_surrogates, "little")
 
-def py3k_str_decode_utf_32(s, size, errors, final=True,
-                           errorhandler=None):
-    result, length, byteorder = str_decode_utf_32_helper(
-        s, size, errors, final, errorhandler, "native", 'utf-32-' + BYTEORDER2)
-    return result, length
-
-def py3k_str_decode_utf_32_be(s, size, errors, final=True,
-                              errorhandler=None):
-    result, length, byteorder = str_decode_utf_32_helper(
-        s, size, errors, final, errorhandler, "big", 'utf-32-be')
-    return result, length
-
-def py3k_str_decode_utf_32_le(s, size, errors, final=True,
-                              errorhandler=None):
-    result, length, byteorder = str_decode_utf_32_helper(
-        s, size, errors, final, errorhandler, "little", 'utf-32-le')
-    return result, length
-
 def py3k_unicode_encode_utf_32(s, size, errors,
                                errorhandler=None, allow_surrogates=True):
     return unicode_encode_utf_32_helper(s, size, errors, errorhandler,
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to