Author: Antonio Cuni <[email protected]>
Branch: fastjson
Changeset: r64770:9e8497c271b6
Date: 2013-06-04 16:20 +0200
http://bitbucket.org/pypy/pypy/changeset/9e8497c271b6/
Log: add support for escape sequences and complain if the string is non
terminated
diff --git a/pypy/module/_fastjson/interp_decoder.py
b/pypy/module/_fastjson/interp_decoder.py
--- a/pypy/module/_fastjson/interp_decoder.py
+++ b/pypy/module/_fastjson/interp_decoder.py
@@ -1,3 +1,4 @@
+from rpython.rlib.rstring import StringBuilder
from pypy.interpreter.error import OperationError, operationerrfmt
from pypy.interpreter.gateway import unwrap_spec
from pypy.interpreter import unicodehelper
@@ -26,6 +27,9 @@
self.i += 1
return ch
+ def unget(self):
+ self.i -= 1
+
def skip_whitespace(self):
while not self.eof():
ch = self.peek()
@@ -42,21 +46,65 @@
else:
assert False, 'Unkown char: %s' % ch
+ def getslice(self, start, end):
+ assert end > 0
+ return self.s[start:end]
+
def decode_string(self):
self.next()
start = self.i
- while True:
+ while not self.eof():
+ # this loop is a fast path for strings which do not contain escape
+ # characters
ch = self.next()
if ch == '"':
- end = self.i-1
- assert end > 0
- content = self.s[start:end]
+ content_utf8 = self.getslice(start, self.i-1)
+ content_unicode = unicodehelper.decode_utf8(self.space,
content_utf8)
self.last_type = TYPE_STRING
- return self.space.wrap(unicodehelper.decode_utf8(self.space,
content))
+ return self.space.wrap(content_unicode)
elif ch == '\\':
- raise Exception("escaped strings not supported yet")
+ content_so_far = self.getslice(start, self.i-1)
+ self.unget()
+ return self.decode_string_escaped(start, content_so_far)
+ raise operationerrfmt(self.space.w_ValueError,
+ "Unterminated string starting at char %d", start)
+ def decode_string_escaped(self, start, content_so_far):
+ builder = StringBuilder(len(content_so_far)*2) # just an estimate
+ builder.append(content_so_far)
+ while not self.eof():
+ ch = self.next()
+ if ch == '"':
+ content_utf8 = builder.build()
+ content_unicode = unicodehelper.decode_utf8(self.space,
content_utf8)
+ self.last_type = TYPE_STRING
+ return self.space.wrap(content_unicode)
+ elif ch == '\\':
+ newchar = self.decode_escape_sequence()
+ builder.append_multiple_char(newchar, 1) # we should implement
append_char
+ else:
+ builder.append_multiple_char(newchar, 1)
+
+ raise operationerrfmt(self.space.w_ValueError,
+ "Unterminated string starting at char %d", start)
+
+ def decode_escape_sequence(self):
+ ch = self.next()
+ if ch == '\\': return '\\'
+ elif ch == '"': return '"'
+ elif ch == '/': return '/'
+ elif ch == 'b': return '\b'
+ elif ch == 'f': return '\f'
+ elif ch == 'n': return '\n'
+ elif ch == 'r': return '\r'
+ elif ch == 't': return '\t'
+ elif ch == 'u':
+ assert False, 'not implemented yet'
+ else:
+ raise operationerrfmt(self.space.w_ValueError,
+ "Invalid \\escape: %s (char %d)", ch,
self.i-1)
+
@unwrap_spec(s=str)
def loads(space, s):
decoder = JSONDecoder(space, s)
diff --git a/pypy/module/_fastjson/test/test__fastjson.py
b/pypy/module/_fastjson/test/test__fastjson.py
--- a/pypy/module/_fastjson/test/test__fastjson.py
+++ b/pypy/module/_fastjson/test/test__fastjson.py
@@ -37,3 +37,19 @@
assert _fastjson.loads(s) == u'hello'
s = ' "hello" extra'
raises(ValueError, "_fastjson.loads(s)")
+
+ def test_unterminated_string(self):
+ import _fastjson
+ s = '"hello' # missing the trailing "
+ raises(ValueError, "_fastjson.loads(s)")
+
+ def test_escape_sequence(self):
+ import _fastjson
+ assert _fastjson.loads(r'"\\"') == u'\\'
+ assert _fastjson.loads(r'"\""') == u'"'
+ assert _fastjson.loads(r'"\/"') == u'/'
+ assert _fastjson.loads(r'"\b"') == u'\b'
+ assert _fastjson.loads(r'"\f"') == u'\f'
+ assert _fastjson.loads(r'"\n"') == u'\n'
+ assert _fastjson.loads(r'"\r"') == u'\r'
+ assert _fastjson.loads(r'"\t"') == u'\t'
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit