[pypy-commit] pypy py3.5: Re-enable raw_encode_basestring_ascii(). Not used so far

arigo Mon, 15 Jan 2018 03:41:52 -0800

Author: Armin Rigo <ar...@tunes.org>
Branch: py3.5
Changeset: r93668:4d7c200aa842
Date: 2018-01-15 12:40 +0100
http://bitbucket.org/pypy/pypy/changeset/4d7c200aa842/


Log:    Re-enable raw_encode_basestring_ascii(). Not used so far

diff --git a/pypy/module/_pypyjson/interp_encoder.py 
b/pypy/module/_pypyjson/interp_encoder.py
--- a/pypy/module/_pypyjson/interp_encoder.py
+++ b/pypy/module/_pypyjson/interp_encoder.py
@@ -1,6 +1,7 @@
 from rpython.rlib.rstring import StringBuilder
 from rpython.rlib.runicode import str_decode_utf_8
 from pypy.interpreter import unicodehelper
+from pypy.interpreter.gateway import unwrap_spec
 
 
 HEX = '0123456789abcdef'
@@ -16,39 +17,21 @@
                        for _i in range(32)]
 
 
-def raw_encode_basestring_ascii(space, w_string):
-    if space.isinstance_w(w_string, space.w_bytes):
-        s = space.bytes_w(w_string)
-        for i in range(len(s)):
-            c = s[i]
-            if c >= ' ' and c <= '~' and c != '"' and c != '\\':
-                pass
-            else:
-                first = i
-                break
-        else:
-            # the input is a string with only non-special ascii chars
-            return w_string
+@unwrap_spec(u=unicode)
+def raw_encode_basestring_ascii(space, u):
+    for i in range(len(u)):
+        c = ord(u[i])
+        if c < 32 or c > 126 or c == ord('\\') or c == ord('"'):
+            break
+    else:
+        # The unicode string 'u' contains only safe characters.
+        # Return None to mean this.
+        return space.w_None
 
-        eh = unicodehelper.decode_error_handler(space)
-        u = str_decode_utf_8(
-                s, len(s), None, final=True, errorhandler=eh,
-                allow_surrogates=True)[0]
-        sb = StringBuilder(len(u))
-        sb.append_slice(s, 0, first)
-    else:
-        # We used to check if 'u' contains only safe characters, and return
-        # 'w_string' directly.  But this requires an extra pass over all
-        # characters, and the expected use case of this function, from
-        # json.encoder, will anyway re-encode a unicode result back to
-        # a string (with the ascii encoding).  This requires two passes
-        # over the characters.  So we may as well directly turn it into a
-        # string here --- only one pass.
-        u = space.unicode_w(w_string)
-        sb = StringBuilder(len(u))
-        first = 0
+    sb = StringBuilder(len(u) + 20)
+    sb.append('"')
 
-    for i in range(first, len(u)):
+    for i in range(len(u)):
         c = ord(u[i])
         if c <= ord('~'):
             if c == ord('"') or c == ord('\\'):
@@ -78,5 +61,6 @@
                 sb.append(HEX[(s2 >> 4) & 0x0f])
                 sb.append(HEX[s2 & 0x0f])
 
+    sb.append('"')
     res = sb.build()
     return space.newtext(res)
diff --git a/pypy/module/_pypyjson/test/test__pypyjson.py 
b/pypy/module/_pypyjson/test/test__pypyjson.py
--- a/pypy/module/_pypyjson/test/test__pypyjson.py
+++ b/pypy/module/_pypyjson/test/test__pypyjson.py
@@ -213,19 +213,20 @@
         raises(ValueError, "_pypyjson.loads(s)")
 
     def test_raw_encode_basestring_ascii(self):
-        py3k_skip("XXX: needs porting to py3k")
         import _pypyjson
-        def check(s):
-            s = _pypyjson.raw_encode_basestring_ascii(s)
+        def check(inp_s):
+            s = _pypyjson.raw_encode_basestring_ascii(inp_s)
+            if s is None:
+                return inp_s
             assert type(s) is str
-            return s
+            assert len(s) >= 2 and s.startswith('"') and s.endswith('"')
+            return s[1:-1]
         assert check("") == ""
         assert check(u"") == ""
         assert check("abc ") == "abc "
         assert check(u"abc ") == "abc "
-        raises(UnicodeDecodeError, check, "\xc0")
-        assert check("\xc2\x84") == "\\u0084"
-        assert check("\xf0\x92\x8d\x85") == "\\ud808\\udf45"
+        assert check("\xc0") == "\\u00c0"
+        assert check("\xc2\x84") == "\\u00c2\\u0084"
         assert check(u"\ud808\udf45") == "\\ud808\\udf45"
         assert check(u"\U00012345") == "\\ud808\\udf45"
         assert check("a\"c") == "a\\\"c"
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy py3.5: Re-enable raw_encode_basestring_ascii(). Not used so far

Reply via email to