[pypy-commit] pypy faster-json: implement encode enough so it passes tests

fijal Thu, 20 Oct 2011 04:37:54 -0700

Author: Maciej Fijalkowski <[email protected]>
Branch: faster-json
Changeset: r48262:1139520345e7
Date: 2011-10-20 13:37 +0200
http://bitbucket.org/pypy/pypy/changeset/1139520345e7/


Log:    implement encode enough so it passes tests

diff --git a/lib-python/modified-2.7/json/encoder.py 
b/lib-python/modified-2.7/json/encoder.py
--- a/lib-python/modified-2.7/json/encoder.py
+++ b/lib-python/modified-2.7/json/encoder.py
@@ -3,6 +3,7 @@
 import re
 
 from __pypy__ import identity_dict
+from __pypy__.builders import StringBuilder, UnicodeBuilder
 
 ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
 ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
@@ -30,7 +31,7 @@
     """
     def replace(match):
         return ESCAPE_DCT[match.group(0)]
-    return '"' + ESCAPE.sub(replace, s) + '"'
+    return ESCAPE.sub(replace, s)
 
 def encode_basestring_ascii(s):
     """Return an ASCII-only JSON representation of a Python string
@@ -45,17 +46,17 @@
         except KeyError:
             n = ord(s)
             if n < 0x10000:
-                return '\\u{0:04x}'.format(n)
-                #return '\\u%04x' % (n,)
+                #return '\\u{0:04x}'.format(n)
+                return '\\u%04x' % (n,)
             else:
                 # surrogate pair
                 n -= 0x10000
                 s1 = 0xd800 | ((n >> 10) & 0x3ff)
                 s2 = 0xdc00 | (n & 0x3ff)
-                return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
-                #return '\\u%04x\\u%04x' % (s1, s2)
-    return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
-py_encode_basestring_ascii = encode_basestring_ascii
+                #return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
+                return '\\u%04x\\u%04x' % (s1, s2)
+    return str(ESCAPE_ASCII.sub(replace, s))
+py_encode_basestring_ascii = lambda s: '"' + encode_basestring_ascii(s) + '"'
 c_encode_basestring_ascii = None
 
 class JSONEncoder(object):
@@ -185,24 +186,126 @@
         '{"foo": ["bar", "baz"]}'
 
         """
-        # This is for extremely simple cases and benchmarks.
+        if self.check_circular:
+            markers = identity_dict()
+        else:
+            markers = None
+        if self.ensure_ascii:
+            builder = StringBuilder()
+        else:
+            builder = UnicodeBuilder()
+        self._encode(o, markers, builder, 0)
+        return builder.build()
+
+    def _emit_indent(self, builder, _current_indent_level):        
+        if self.indent is not None:
+            _current_indent_level += 1
+            newline_indent = '\n' + (' ' * (self.indent *
+                                            _current_indent_level))
+            separator = self.item_separator + newline_indent
+            builder.append(newline_indent)
+        else:
+            separator = self.item_separator
+        return separator, _current_indent_level
+
+    def _emit_unindent(self, builder, _current_indent_level):
+        if self.indent is not None:
+            builder.append('\n')
+            builder.append(' ' * (self.indent * (_current_indent_level - 1)))
+
+    def _encode(self, o, markers, builder, _current_indent_level):
         if isinstance(o, basestring):
-            if isinstance(o, str):
-                _encoding = self.encoding
-                if (_encoding is not None
-                        and not (_encoding == 'utf-8')):
-                    o = o.decode(_encoding)
-            if self.ensure_ascii:
-                return encode_basestring_ascii(o)
+            builder.append('"')
+            builder.append(self.encoder(o))
+            builder.append('"')
+        elif o is None:
+            builder.append('null')
+        elif o is True:
+            builder.append('true')
+        elif o is False:
+            builder.append('false')
+        elif isinstance(o, (int, long)):
+            builder.append(str(o))
+        elif isinstance(o, float):
+            builder.append(self._floatstr(o))
+        elif isinstance(o, (list, tuple)):
+            if not o:
+                builder.append('[]')
+                return
+            self._encode_list(o, markers, builder, _current_indent_level)
+        elif isinstance(o, dict):
+            if not o:
+                builder.append('{}')
+                return
+            self._encode_dict(o, markers, builder, _current_indent_level)
+        else:
+            self._mark_markers(markers, o)
+            res = self.default(o)
+            self._encode(res, markers, builder, _current_indent_level)
+            self._remove_markers(markers, o)
+            return res
+
+    def _encode_list(self, l, markers, builder, _current_indent_level):
+        self._mark_markers(markers, l)
+        builder.append('[')
+        first = True
+        separator, _current_indent_level = self._emit_indent(builder,
+                                                      _current_indent_level)
+        for elem in l:
+            if first:
+                first = False
             else:
-                return encode_basestring(o)
-        # This doesn't pass the iterator directly to ''.join() because the
-        # exceptions aren't as detailed.  The list call should be roughly
-        # equivalent to the PySequence_Fast that ''.join() would do.        
-        chunks = self.iterencode(o, _one_shot=True)
-        if not isinstance(chunks, (list, tuple)):
-            chunks = list(chunks)
-        return ''.join(chunks)
+                builder.append(separator)
+            self._encode(elem, markers, builder, _current_indent_level)
+            del elem # XXX grumble
+        self._emit_unindent(builder, _current_indent_level)
+        builder.append(']')
+        self._remove_markers(markers, l)
+
+    def _encode_dict(self, d, markers, builder, _current_indent_level):
+        self._mark_markers(markers, d)
+        first = True
+        builder.append('{')
+        separator, _current_indent_level = self._emit_indent(builder,
+                                                         _current_indent_level)
+        if self.sort_keys:
+            items = sorted(d.items(), key=lambda kv: kv[0])
+        else:
+            items = d.iteritems()
+
+        for key, v in items:
+            if first:
+                first = False
+            else:
+                builder.append(separator)
+            if isinstance(key, basestring):
+                pass
+            # JavaScript is weakly typed for these, so it makes sense to
+            # also allow them.  Many encoders seem to do something like this.
+            elif isinstance(key, float):
+                key = self._floatstr(key)
+            elif key is True:
+                key = 'true'
+            elif key is False:
+                key = 'false'
+            elif key is None:
+                key = 'null'
+            elif isinstance(key, (int, long)):
+                key = str(key)
+            elif self.skipkeys:
+                continue
+            else:
+                raise TypeError("key " + repr(key) + " is not a string")
+            builder.append('"')
+            builder.append(self.encoder(key))
+            builder.append('"')
+            builder.append(self.key_separator)
+            self._encode(v, markers, builder, _current_indent_level)
+            del key
+            del v # XXX grumble
+        self._emit_unindent(builder, _current_indent_level)
+        builder.append('}')
+        self._remove_markers(markers, d)
 
     def iterencode(self, o, _one_shot=False):
         """Encode the given object and yield each string
@@ -273,7 +376,7 @@
             else:
                 buf = separator
             if isinstance(value, basestring):
-                yield buf + self.encoder(value)
+                yield buf + '"' + self.encoder(value) + '"'
             elif value is None:
                 yield buf + 'null'
             elif value is True:
@@ -346,10 +449,10 @@
                 first = False
             else:
                 yield item_separator
-            yield self.encoder(key)
+            yield '"' + self.encoder(key) + '"'
             yield self.key_separator
             if isinstance(value, basestring):
-                yield self.encoder(value)
+                yield '"' + self.encoder(value) + '"'
             elif value is None:
                 yield 'null'
             elif value is True:
@@ -380,7 +483,7 @@
 
     def _iterencode(self, o, markers, _current_indent_level):
         if isinstance(o, basestring):
-            yield self.encoder(o)
+            yield '"' + self.encoder(o) + '"'
         elif o is None:
             yield 'null'
         elif o is True:
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy faster-json: implement encode enough so it passes tests

Reply via email to