Author: Armin Rigo <ar...@tunes.org> Branch: py3.5 Changeset: r93668:4d7c200aa842 Date: 2018-01-15 12:40 +0100 http://bitbucket.org/pypy/pypy/changeset/4d7c200aa842/
Log: Re-enable raw_encode_basestring_ascii(). Not used so far diff --git a/pypy/module/_pypyjson/interp_encoder.py b/pypy/module/_pypyjson/interp_encoder.py --- a/pypy/module/_pypyjson/interp_encoder.py +++ b/pypy/module/_pypyjson/interp_encoder.py @@ -1,6 +1,7 @@ from rpython.rlib.rstring import StringBuilder from rpython.rlib.runicode import str_decode_utf_8 from pypy.interpreter import unicodehelper +from pypy.interpreter.gateway import unwrap_spec HEX = '0123456789abcdef' @@ -16,39 +17,21 @@ for _i in range(32)] -def raw_encode_basestring_ascii(space, w_string): - if space.isinstance_w(w_string, space.w_bytes): - s = space.bytes_w(w_string) - for i in range(len(s)): - c = s[i] - if c >= ' ' and c <= '~' and c != '"' and c != '\\': - pass - else: - first = i - break - else: - # the input is a string with only non-special ascii chars - return w_string +@unwrap_spec(u=unicode) +def raw_encode_basestring_ascii(space, u): + for i in range(len(u)): + c = ord(u[i]) + if c < 32 or c > 126 or c == ord('\\') or c == ord('"'): + break + else: + # The unicode string 'u' contains only safe characters. + # Return None to mean this. + return space.w_None - eh = unicodehelper.decode_error_handler(space) - u = str_decode_utf_8( - s, len(s), None, final=True, errorhandler=eh, - allow_surrogates=True)[0] - sb = StringBuilder(len(u)) - sb.append_slice(s, 0, first) - else: - # We used to check if 'u' contains only safe characters, and return - # 'w_string' directly. But this requires an extra pass over all - # characters, and the expected use case of this function, from - # json.encoder, will anyway re-encode a unicode result back to - # a string (with the ascii encoding). This requires two passes - # over the characters. So we may as well directly turn it into a - # string here --- only one pass. - u = space.unicode_w(w_string) - sb = StringBuilder(len(u)) - first = 0 + sb = StringBuilder(len(u) + 20) + sb.append('"') - for i in range(first, len(u)): + for i in range(len(u)): c = ord(u[i]) if c <= ord('~'): if c == ord('"') or c == ord('\\'): @@ -78,5 +61,6 @@ sb.append(HEX[(s2 >> 4) & 0x0f]) sb.append(HEX[s2 & 0x0f]) + sb.append('"') res = sb.build() return space.newtext(res) diff --git a/pypy/module/_pypyjson/test/test__pypyjson.py b/pypy/module/_pypyjson/test/test__pypyjson.py --- a/pypy/module/_pypyjson/test/test__pypyjson.py +++ b/pypy/module/_pypyjson/test/test__pypyjson.py @@ -213,19 +213,20 @@ raises(ValueError, "_pypyjson.loads(s)") def test_raw_encode_basestring_ascii(self): - py3k_skip("XXX: needs porting to py3k") import _pypyjson - def check(s): - s = _pypyjson.raw_encode_basestring_ascii(s) + def check(inp_s): + s = _pypyjson.raw_encode_basestring_ascii(inp_s) + if s is None: + return inp_s assert type(s) is str - return s + assert len(s) >= 2 and s.startswith('"') and s.endswith('"') + return s[1:-1] assert check("") == "" assert check(u"") == "" assert check("abc ") == "abc " assert check(u"abc ") == "abc " - raises(UnicodeDecodeError, check, "\xc0") - assert check("\xc2\x84") == "\\u0084" - assert check("\xf0\x92\x8d\x85") == "\\ud808\\udf45" + assert check("\xc0") == "\\u00c0" + assert check("\xc2\x84") == "\\u00c2\\u0084" assert check(u"\ud808\udf45") == "\\ud808\\udf45" assert check(u"\U00012345") == "\\ud808\\udf45" assert check("a\"c") == "a\\\"c" _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit