Author: Armin Rigo <[email protected]>
Branch: py3.5
Changeset: r93668:4d7c200aa842
Date: 2018-01-15 12:40 +0100
http://bitbucket.org/pypy/pypy/changeset/4d7c200aa842/
Log: Re-enable raw_encode_basestring_ascii(). Not used so far
diff --git a/pypy/module/_pypyjson/interp_encoder.py
b/pypy/module/_pypyjson/interp_encoder.py
--- a/pypy/module/_pypyjson/interp_encoder.py
+++ b/pypy/module/_pypyjson/interp_encoder.py
@@ -1,6 +1,7 @@
from rpython.rlib.rstring import StringBuilder
from rpython.rlib.runicode import str_decode_utf_8
from pypy.interpreter import unicodehelper
+from pypy.interpreter.gateway import unwrap_spec
HEX = '0123456789abcdef'
@@ -16,39 +17,21 @@
for _i in range(32)]
-def raw_encode_basestring_ascii(space, w_string):
- if space.isinstance_w(w_string, space.w_bytes):
- s = space.bytes_w(w_string)
- for i in range(len(s)):
- c = s[i]
- if c >= ' ' and c <= '~' and c != '"' and c != '\\':
- pass
- else:
- first = i
- break
- else:
- # the input is a string with only non-special ascii chars
- return w_string
+@unwrap_spec(u=unicode)
+def raw_encode_basestring_ascii(space, u):
+ for i in range(len(u)):
+ c = ord(u[i])
+ if c < 32 or c > 126 or c == ord('\\') or c == ord('"'):
+ break
+ else:
+ # The unicode string 'u' contains only safe characters.
+ # Return None to mean this.
+ return space.w_None
- eh = unicodehelper.decode_error_handler(space)
- u = str_decode_utf_8(
- s, len(s), None, final=True, errorhandler=eh,
- allow_surrogates=True)[0]
- sb = StringBuilder(len(u))
- sb.append_slice(s, 0, first)
- else:
- # We used to check if 'u' contains only safe characters, and return
- # 'w_string' directly. But this requires an extra pass over all
- # characters, and the expected use case of this function, from
- # json.encoder, will anyway re-encode a unicode result back to
- # a string (with the ascii encoding). This requires two passes
- # over the characters. So we may as well directly turn it into a
- # string here --- only one pass.
- u = space.unicode_w(w_string)
- sb = StringBuilder(len(u))
- first = 0
+ sb = StringBuilder(len(u) + 20)
+ sb.append('"')
- for i in range(first, len(u)):
+ for i in range(len(u)):
c = ord(u[i])
if c <= ord('~'):
if c == ord('"') or c == ord('\\'):
@@ -78,5 +61,6 @@
sb.append(HEX[(s2 >> 4) & 0x0f])
sb.append(HEX[s2 & 0x0f])
+ sb.append('"')
res = sb.build()
return space.newtext(res)
diff --git a/pypy/module/_pypyjson/test/test__pypyjson.py
b/pypy/module/_pypyjson/test/test__pypyjson.py
--- a/pypy/module/_pypyjson/test/test__pypyjson.py
+++ b/pypy/module/_pypyjson/test/test__pypyjson.py
@@ -213,19 +213,20 @@
raises(ValueError, "_pypyjson.loads(s)")
def test_raw_encode_basestring_ascii(self):
- py3k_skip("XXX: needs porting to py3k")
import _pypyjson
- def check(s):
- s = _pypyjson.raw_encode_basestring_ascii(s)
+ def check(inp_s):
+ s = _pypyjson.raw_encode_basestring_ascii(inp_s)
+ if s is None:
+ return inp_s
assert type(s) is str
- return s
+ assert len(s) >= 2 and s.startswith('"') and s.endswith('"')
+ return s[1:-1]
assert check("") == ""
assert check(u"") == ""
assert check("abc ") == "abc "
assert check(u"abc ") == "abc "
- raises(UnicodeDecodeError, check, "\xc0")
- assert check("\xc2\x84") == "\\u0084"
- assert check("\xf0\x92\x8d\x85") == "\\ud808\\udf45"
+ assert check("\xc0") == "\\u00c0"
+ assert check("\xc2\x84") == "\\u00c2\\u0084"
assert check(u"\ud808\udf45") == "\\ud808\\udf45"
assert check(u"\U00012345") == "\\ud808\\udf45"
assert check("a\"c") == "a\\\"c"
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit