Author: Armin Rigo <[email protected]>
Branch: py3.5
Changeset: r93669:18c3825bad60
Date: 2018-01-15 13:02 +0100
http://bitbucket.org/pypy/pypy/changeset/18c3825bad60/
Log: This is encoder.py from pypy 2.7, with the changes in CPython 2.7--
3.5 manually applied.
diff --git a/lib-python/3/json/encoder.py b/lib-python/3/json/encoder.py
--- a/lib-python/3/json/encoder.py
+++ b/lib-python/3/json/encoder.py
@@ -2,6 +2,8 @@
"""
import re
+from __pypy__.builders import StringBuilder
+
try:
from _json import encode_basestring_ascii as c_encode_basestring_ascii
except ImportError:
@@ -157,6 +159,11 @@
if default is not None:
self.default = default
+ if indent is not None and not isinstance(ident, str):
+ self.indent_str = ' ' * indent
+ else:
+ self.indent_str = indent
+
def default(self, o):
"""Implement this method in a subclass such that it returns
a serializable object for ``o``, or calls the base implementation
@@ -186,19 +193,126 @@
'{"foo": ["bar", "baz"]}'
"""
- # This is for extremely simple cases and benchmarks.
+ if self.check_circular:
+ markers = {}
+ else:
+ markers = None
+ builder = StringBuilder()
+ self.__encode(o, markers, builder, 0)
+ return builder.build()
+
+ def __emit_indent(self, builder, _current_indent_level):
+ if self.indent is not None:
+ _current_indent_level += 1
+ newline_indent = '\n' + self.indent_str * _current_indent_level
+ separator = self.item_separator + newline_indent
+ builder.append(newline_indent)
+ else:
+ separator = self.item_separator
+ return separator, _current_indent_level
+
+ def __emit_unindent(self, builder, _current_indent_level):
+ if self.indent is not None:
+ builder.append('\n')
+ builder.append(self.indent_str * (_current_indent_level - 1))
+
+ def __encode(self, o, markers, builder, _current_indent_level):
if isinstance(o, str):
- if self.ensure_ascii:
- return encode_basestring_ascii(o)
+ builder.append('"')
+ builder.append(self.__encoder(o))
+ builder.append('"')
+ elif o is None:
+ builder.append('null')
+ elif o is True:
+ builder.append('true')
+ elif o is False:
+ builder.append('false')
+ elif isinstance(o, int):
+ # Subclasses of int/float may override __str__, but we still
+ # want to encode them as integers/floats in JSON. One example
+ # within the standard library is IntEnum.
+ builder.append(int.__str__(o))
+ elif isinstance(o, float):
+ builder.append(self.__floatstr(o))
+ elif isinstance(o, (list, tuple)):
+ if not o:
+ builder.append('[]')
+ return
+ self.__encode_list(o, markers, builder, _current_indent_level)
+ elif isinstance(o, dict):
+ if not o:
+ builder.append('{}')
+ return
+ self.__encode_dict(o, markers, builder, _current_indent_level)
+ else:
+ self.__mark_markers(markers, o)
+ res = self.default(o)
+ self.__encode(res, markers, builder, _current_indent_level)
+ self.__remove_markers(markers, o)
+ return res
+
+ def __encode_list(self, l, markers, builder, _current_indent_level):
+ self.__mark_markers(markers, l)
+ builder.append('[')
+ first = True
+ separator, _current_indent_level = self.__emit_indent(builder,
+ _current_indent_level)
+ for elem in l:
+ if first:
+ first = False
else:
- return encode_basestring(o)
- # This doesn't pass the iterator directly to ''.join() because the
- # exceptions aren't as detailed. The list call should be roughly
- # equivalent to the PySequence_Fast that ''.join() would do.
- chunks = self.iterencode(o, _one_shot=True)
- if not isinstance(chunks, (list, tuple)):
- chunks = list(chunks)
- return ''.join(chunks)
+ builder.append(separator)
+ self.__encode(elem, markers, builder, _current_indent_level)
+ del elem # XXX grumble
+ self.__emit_unindent(builder, _current_indent_level)
+ builder.append(']')
+ self.__remove_markers(markers, l)
+
+ def __encode_dict(self, d, markers, builder, _current_indent_level):
+ self.__mark_markers(markers, d)
+ first = True
+ builder.append('{')
+ separator, _current_indent_level = self.__emit_indent(builder,
+ _current_indent_level)
+ if self.sort_keys:
+ items = sorted(d.items(), key=lambda kv: kv[0])
+ else:
+ items = d.items()
+
+ for key, v in items:
+ if first:
+ first = False
+ else:
+ builder.append(separator)
+ if isinstance(key, str):
+ pass
+ # JavaScript is weakly typed for these, so it makes sense to
+ # also allow them. Many encoders seem to do something like this.
+ elif isinstance(key, float):
+ key = self.__floatstr(key)
+ elif key is True:
+ key = 'true'
+ elif key is False:
+ key = 'false'
+ elif key is None:
+ key = 'null'
+ elif isinstance(key, int):
+ # see comment for int in __encode
+ key = int.__str__(key)
+ elif self.skipkeys:
+ continue
+ else:
+ raise TypeError("key " + repr(key) + " is not a string")
+ builder.append('"')
+ builder.append(self.__encoder(key))
+ builder.append('"')
+ builder.append(self.key_separator)
+ self.__encode(v, markers, builder, _current_indent_level)
+ del key
+ del v # XXX grumble
+ self.__emit_unindent(builder, _current_indent_level)
+ builder.append('}')
+ self.__remove_markers(markers, d)
def iterencode(self, o, _one_shot=False):
"""Encode the given object and yield each string
@@ -214,83 +328,53 @@
markers = {}
else:
markers = None
- if self.ensure_ascii:
- _encoder = encode_basestring_ascii
+ return self.__iterencode(o, markers, 0)
+
+ def __floatstr(self, o):
+ # Check for specials. Note that this type of test is processor
+ # and/or platform-specific, so do tests which don't depend on the
+ # internals.
+
+ if o != o:
+ text = 'NaN'
+ elif o == INFINITY:
+ text = 'Infinity'
+ elif o == -INFINITY:
+ text = '-Infinity'
else:
- _encoder = encode_basestring
+ return float.__repr__(o)
- def floatstr(o, allow_nan=self.allow_nan,
- _repr=float.__repr__, _inf=INFINITY, _neginf=-INFINITY):
- # Check for specials. Note that this type of test is processor
- # and/or platform-specific, so do tests which don't depend on the
- # internals.
+ if not self.allow_nan:
+ raise ValueError(
+ "Out of range float values are not JSON compliant: " +
+ repr(o))
- if o != o:
- text = 'NaN'
- elif o == _inf:
- text = 'Infinity'
- elif o == _neginf:
- text = '-Infinity'
- else:
- return _repr(o)
+ return text
- if not allow_nan:
- raise ValueError(
- "Out of range float values are not JSON compliant: " +
- repr(o))
+ def __mark_markers(self, markers, o):
+ if markers is not None:
+ if id(o) in markers:
+ raise ValueError("Circular reference detected")
+ markers[id(o)] = None
- return text
+ def __remove_markers(self, markers, o):
+ if markers is not None:
+ del markers[id(o)]
-
- if (_one_shot and c_make_encoder is not None
- and self.indent is None):
- _iterencode = c_make_encoder(
- markers, self.default, _encoder, self.indent,
- self.key_separator, self.item_separator, self.sort_keys,
- self.skipkeys, self.allow_nan)
- else:
- _iterencode = _make_iterencode(
- markers, self.default, _encoder, self.indent, floatstr,
- self.key_separator, self.item_separator, self.sort_keys,
- self.skipkeys, _one_shot)
- return _iterencode(o, 0)
-
-def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
- _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
- ## HACK: hand-optimized bytecode; turn globals into locals
- ValueError=ValueError,
- dict=dict,
- float=float,
- id=id,
- int=int,
- isinstance=isinstance,
- list=list,
- str=str,
- tuple=tuple,
- _intstr=int.__str__,
- ):
-
- if _indent is not None and not isinstance(_indent, str):
- _indent = ' ' * _indent
-
- def _iterencode_list(lst, _current_indent_level):
+ def __iterencode_list(self, lst, markers, _current_indent_level):
if not lst:
yield '[]'
return
- if markers is not None:
- markerid = id(lst)
- if markerid in markers:
- raise ValueError("Circular reference detected")
- markers[markerid] = lst
+ self.__mark_markers(markers, lst)
buf = '['
- if _indent is not None:
+ if self.indent is not None:
_current_indent_level += 1
- newline_indent = '\n' + _indent * _current_indent_level
- separator = _item_separator + newline_indent
+ newline_indent = '\n' + self.indent_str * _current_indent_level
+ separator = self.item_separator + newline_indent
buf += newline_indent
else:
newline_indent = None
- separator = _item_separator
+ separator = self.item_separator
first = True
for value in lst:
if first:
@@ -298,7 +382,7 @@
else:
buf = separator
if isinstance(value, str):
- yield buf + _encoder(value)
+ yield buf + '"' + self.__encoder(value) + '"'
elif value is None:
yield buf + 'null'
elif value is True:
@@ -306,49 +390,44 @@
elif value is False:
yield buf + 'false'
elif isinstance(value, int):
- # Subclasses of int/float may override __str__, but we still
- # want to encode them as integers/floats in JSON. One example
- # within the standard library is IntEnum.
- yield buf + _intstr(value)
+ # see comment for int in __encode
+ yield buf + int.__str__(value)
elif isinstance(value, float):
- # see comment above for int
- yield buf + _floatstr(value)
+ yield buf + self.__floatstr(value)
else:
yield buf
if isinstance(value, (list, tuple)):
- chunks = _iterencode_list(value, _current_indent_level)
+ chunks = self.__iterencode_list(value, markers,
+ _current_indent_level)
elif isinstance(value, dict):
- chunks = _iterencode_dict(value, _current_indent_level)
+ chunks = self.__iterencode_dict(value, markers,
+ _current_indent_level)
else:
- chunks = _iterencode(value, _current_indent_level)
+ chunks = self.__iterencode(value, markers,
+ _current_indent_level)
yield from chunks
if newline_indent is not None:
_current_indent_level -= 1
- yield '\n' + _indent * _current_indent_level
+ yield '\n' + self.indent_str * _current_indent_level
yield ']'
- if markers is not None:
- del markers[markerid]
+ self.__remove_markers(markers, lst)
- def _iterencode_dict(dct, _current_indent_level):
+ def __iterencode_dict(self, dct, markers, _current_indent_level):
if not dct:
yield '{}'
return
- if markers is not None:
- markerid = id(dct)
- if markerid in markers:
- raise ValueError("Circular reference detected")
- markers[markerid] = dct
+ self.__mark_markers(markers, dct)
yield '{'
- if _indent is not None:
+ if self.indent is not None:
_current_indent_level += 1
- newline_indent = '\n' + _indent * _current_indent_level
- item_separator = _item_separator + newline_indent
+ newline_indent = '\n' + self.indent_str * _current_indent_level
+ item_separator = self.item_separator + newline_indent
yield newline_indent
else:
newline_indent = None
- item_separator = _item_separator
+ item_separator = self.item_separator
first = True
- if _sort_keys:
+ if self.sort_keys:
items = sorted(dct.items(), key=lambda kv: kv[0])
else:
items = dct.items()
@@ -358,8 +437,7 @@
# JavaScript is weakly typed for these, so it makes sense to
# also allow them. Many encoders seem to do something like this.
elif isinstance(key, float):
- # see comment for int/float in _make_iterencode
- key = _floatstr(key)
+ key = self.__floatstr(key)
elif key is True:
key = 'true'
elif key is False:
@@ -367,9 +445,9 @@
elif key is None:
key = 'null'
elif isinstance(key, int):
- # see comment for int/float in _make_iterencode
- key = _intstr(key)
- elif _skipkeys:
+ # see comment for int in __encode
+ key = int.__str__(key)
+ elif self.skipkeys:
continue
else:
raise TypeError("key " + repr(key) + " is not a string")
@@ -377,64 +455,68 @@
first = False
else:
yield item_separator
- yield _encoder(key)
- yield _key_separator
+ yield '"' + self.__encoder(key) + '"'
+ yield self.key_separator
if isinstance(value, str):
- yield _encoder(value)
+ yield '"' + self.__encoder(value) + '"'
elif value is None:
yield 'null'
elif value is True:
yield 'true'
elif value is False:
yield 'false'
- elif isinstance(value, int):
- # see comment for int/float in _make_iterencode
- yield _intstr(value)
+ elif isinstance(value, (int, long)):
+ yield str(value)
elif isinstance(value, float):
- # see comment for int/float in _make_iterencode
- yield _floatstr(value)
+ yield self.__floatstr(value)
else:
if isinstance(value, (list, tuple)):
- chunks = _iterencode_list(value, _current_indent_level)
+ chunks = self.__iterencode_list(value, markers,
+ _current_indent_level)
elif isinstance(value, dict):
- chunks = _iterencode_dict(value, _current_indent_level)
+ chunks = self.__iterencode_dict(value, markers,
+ _current_indent_level)
else:
- chunks = _iterencode(value, _current_indent_level)
+ chunks = self.__iterencode(value, markers,
+ _current_indent_level)
yield from chunks
if newline_indent is not None:
_current_indent_level -= 1
- yield '\n' + _indent * _current_indent_level
+ yield '\n' + self.indent_str * _current_indent_level
yield '}'
- if markers is not None:
- del markers[markerid]
+ self.__remove_markers(markers, dct)
- def _iterencode(o, _current_indent_level):
+ def __iterencode(self, o, markers, _current_indent_level):
if isinstance(o, str):
- yield _encoder(o)
+ yield '"' + self.__encoder(o) + '"'
elif o is None:
yield 'null'
elif o is True:
yield 'true'
elif o is False:
yield 'false'
- elif isinstance(o, int):
- # see comment for int/float in _make_iterencode
- yield _intstr(o)
+ elif isinstance(o, (int, long)):
+ yield str(o)
elif isinstance(o, float):
- # see comment for int/float in _make_iterencode
- yield _floatstr(o)
+ yield self.__floatstr(o)
elif isinstance(o, (list, tuple)):
- yield from _iterencode_list(o, _current_indent_level)
+ yield from self.__iterencode_list(o, markers,
_current_indent_level)
elif isinstance(o, dict):
- yield from _iterencode_dict(o, _current_indent_level)
+ yield from self.__iterencode_dict(o, markers,
_current_indent_level)
else:
- if markers is not None:
- markerid = id(o)
- if markerid in markers:
- raise ValueError("Circular reference detected")
- markers[markerid] = o
- o = _default(o)
- yield from _iterencode(o, _current_indent_level)
- if markers is not None:
- del markers[markerid]
- return _iterencode
+ self.__mark_markers(markers, o)
+ obj = self.default(o)
+ yield from self.__iterencode(obj, markers, _current_indent_level)
+ self.__remove_markers(markers, o)
+
+
+# overwrite some helpers here with more efficient versions
+try:
+ from _pypyjson import raw_encode_basestring_ascii
+ def encode_basestring_ascii(s):
+ encoded = raw_encode_basestring_ascii(s)
+ if encoded is None:
+ return '"' + s + '"'
+ return encoded # on pypy3, includes the quotes already
+except ImportError:
+ pass
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit