[pypy-commit] pypy fastjson: add support for escape sequences and complain if the string is non terminated

antocuni Tue, 04 Jun 2013 08:42:56 -0700

Author: Antonio Cuni <[email protected]>
Branch: fastjson
Changeset: r64770:9e8497c271b6
Date: 2013-06-04 16:20 +0200
http://bitbucket.org/pypy/pypy/changeset/9e8497c271b6/


Log:    add support for escape sequences and complain if the string is non
        terminated

diff --git a/pypy/module/_fastjson/interp_decoder.py 
b/pypy/module/_fastjson/interp_decoder.py
--- a/pypy/module/_fastjson/interp_decoder.py
+++ b/pypy/module/_fastjson/interp_decoder.py
@@ -1,3 +1,4 @@
+from rpython.rlib.rstring import StringBuilder
 from pypy.interpreter.error import OperationError, operationerrfmt
 from pypy.interpreter.gateway import unwrap_spec
 from pypy.interpreter import unicodehelper
@@ -26,6 +27,9 @@
         self.i += 1
         return ch
 
+    def unget(self):
+        self.i -= 1
+
     def skip_whitespace(self):
         while not self.eof():
             ch = self.peek()
@@ -42,21 +46,65 @@
         else:
             assert False, 'Unkown char: %s' % ch
 
+    def getslice(self, start, end):
+        assert end > 0
+        return self.s[start:end]
+
     def decode_string(self):
         self.next()
         start = self.i
-        while True:
+        while not self.eof():
+            # this loop is a fast path for strings which do not contain escape
+            # characters
             ch = self.next()
             if ch == '"':
-                end = self.i-1
-                assert end > 0
-                content = self.s[start:end]
+                content_utf8 = self.getslice(start, self.i-1)
+                content_unicode = unicodehelper.decode_utf8(self.space, 
content_utf8)
                 self.last_type = TYPE_STRING
-                return self.space.wrap(unicodehelper.decode_utf8(self.space, 
content))
+                return self.space.wrap(content_unicode)
             elif ch == '\\':
-                raise Exception("escaped strings not supported yet")
+                content_so_far = self.getslice(start, self.i-1)
+                self.unget()
+                return self.decode_string_escaped(start, content_so_far)
+        raise operationerrfmt(self.space.w_ValueError,
+                              "Unterminated string starting at char %d", start)
 
 
+    def decode_string_escaped(self, start, content_so_far):
+        builder = StringBuilder(len(content_so_far)*2) # just an estimate
+        builder.append(content_so_far)
+        while not self.eof():
+            ch = self.next()
+            if ch == '"':
+                content_utf8 = builder.build()
+                content_unicode = unicodehelper.decode_utf8(self.space, 
content_utf8)
+                self.last_type = TYPE_STRING
+                return self.space.wrap(content_unicode)
+            elif ch == '\\':
+                newchar = self.decode_escape_sequence()
+                builder.append_multiple_char(newchar, 1) # we should implement 
append_char
+            else:
+                builder.append_multiple_char(newchar, 1)
+            
+        raise operationerrfmt(self.space.w_ValueError,
+                              "Unterminated string starting at char %d", start)
+
+    def decode_escape_sequence(self):
+        ch = self.next()
+        if ch == '\\':  return '\\'
+        elif ch == '"': return '"'
+        elif ch == '/': return '/'
+        elif ch == 'b': return '\b'
+        elif ch == 'f': return '\f'
+        elif ch == 'n': return '\n'
+        elif ch == 'r': return '\r'
+        elif ch == 't': return '\t'
+        elif ch == 'u':
+            assert False, 'not implemented yet'
+        else:
+            raise operationerrfmt(self.space.w_ValueError,
+                                  "Invalid \\escape: %s (char %d)", ch, 
self.i-1)
+
 @unwrap_spec(s=str)
 def loads(space, s):
     decoder = JSONDecoder(space, s)
diff --git a/pypy/module/_fastjson/test/test__fastjson.py 
b/pypy/module/_fastjson/test/test__fastjson.py
--- a/pypy/module/_fastjson/test/test__fastjson.py
+++ b/pypy/module/_fastjson/test/test__fastjson.py
@@ -37,3 +37,19 @@
         assert _fastjson.loads(s) == u'hello'
         s = '   "hello"   extra'
         raises(ValueError, "_fastjson.loads(s)")
+
+    def test_unterminated_string(self):
+        import _fastjson
+        s = '"hello' # missing the trailing "
+        raises(ValueError, "_fastjson.loads(s)")
+
+    def test_escape_sequence(self):
+        import _fastjson
+        assert _fastjson.loads(r'"\\"') == u'\\'
+        assert _fastjson.loads(r'"\""') == u'"'
+        assert _fastjson.loads(r'"\/"') == u'/'       
+        assert _fastjson.loads(r'"\b"') == u'\b'
+        assert _fastjson.loads(r'"\f"') == u'\f'
+        assert _fastjson.loads(r'"\n"') == u'\n'
+        assert _fastjson.loads(r'"\r"') == u'\r'
+        assert _fastjson.loads(r'"\t"') == u'\t'
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy fastjson: add support for escape sequences and complain if the string is non terminated

Reply via email to