Author: Carl Friedrich Bolz-Tereick <cfb...@gmx.de> Branch: Changeset: r97924:86da6cb357f1 Date: 2019-10-31 21:07 +0100 http://bitbucket.org/pypy/pypy/changeset/86da6cb357f1/
Log: fix #3108: the map based parser didn't deal with json dicts with repeated keys correctly diff --git a/pypy/module/_pypyjson/interp_decoder.py b/pypy/module/_pypyjson/interp_decoder.py --- a/pypy/module/_pypyjson/interp_decoder.py +++ b/pypy/module/_pypyjson/interp_decoder.py @@ -342,7 +342,14 @@ currmap = self.startmap while True: # parse a key: value - currmap = self.decode_key_map(i, currmap) + newmap = self.decode_key_map(i, currmap) + if newmap is None: + # We've seen a repeated value, switch to dict-based storage. + dict_w = self._switch_to_dict(currmap, values_w, nextindex) + # We re-parse the last key, to get the correct overwriting + # effect. Pointless to care for performance here. + return self.decode_object_dict(i, start, dict_w) + currmap = newmap i = self.skip_whitespace(self.pos) ch = self.ll_chars[i] if ch != ':': @@ -610,6 +617,8 @@ """ Given the current map currmap of an object, decode the next key at position i. This returns the new map of the object. """ newmap = self._decode_key_map(i, currmap) + if newmap is None: + return None currmap.observe_transition(newmap, self.startmap) return newmap @@ -789,6 +798,11 @@ self.nextmap_first._check_invariants() def get_next(self, w_key, string, start, stop, terminator): + """ Returns the next map, given a wrapped key w_key, the json input + string with positions start and stop, as well as a terminator. + + Returns None if the key already appears somewhere in the map chain. + """ from pypy.objspace.std.dictmultiobject import unicode_hash, unicode_eq if isinstance(self, JSONMap): assert not self.state == MapBase.BLOCKED @@ -803,6 +817,8 @@ if nextmap_first is None: # first transition ever seen, don't initialize nextmap_all next = self._make_next_map(w_key, string[start:stop]) + if next is None: + return None self.nextmap_first = next else: if self.nextmap_all is None: @@ -817,6 +833,8 @@ # if we are at this point we didn't find the transition yet, so # create a new one next = self._make_next_map(w_key, string[start:stop]) + if next is None: + return None self.nextmap_all[w_key] = next # one new leaf has been created @@ -859,6 +877,14 @@ self.mark_useful(terminator) def _make_next_map(self, w_key, key_repr): + # Check whether w_key is already part of the self.prev chain + # to prevent strangeness in the json dict implementation. + # This is slow, but it should be rare to call this function. + check = self + while isinstance(check, JSONMap): + if check.w_key._utf8 == w_key._utf8: + return None + check = check.prev return JSONMap(self.space, self, w_key, key_repr) def fill_dict(self, dict_w, values_w): diff --git a/pypy/module/_pypyjson/test/test__pypyjson.py b/pypy/module/_pypyjson/test/test__pypyjson.py --- a/pypy/module/_pypyjson/test/test__pypyjson.py +++ b/pypy/module/_pypyjson/test/test__pypyjson.py @@ -74,6 +74,17 @@ m3.fill_dict(d, [space.w_None, space.w_None, space.w_None]) assert list(d) == [w_a, w_b, w_c] + def test_repeated_key_get_next(self): + m = Terminator(self.space) + w_a = self.space.newutf8("a", 1) + w_b = self.space.newutf8("b", 1) + w_c = self.space.newutf8("c", 1) + m1 = m.get_next(w_a, '"a"', 0, 3, m) + m1 = m1.get_next(w_b, '"b"', 0, 3, m) + m1 = m1.get_next(w_c, '"c"', 0, 3, m) + m2 = m1.get_next(w_a, '"a"', 0, 3, m) + assert m2 is None + def test_decode_key_map(self): m = Terminator(self.space) @@ -519,3 +530,11 @@ exc = raises(ValueError, _pypyjson.loads, inputtext) assert str(exc.value) == errmsg + def test_repeated_key(self): + import _pypyjson + a = '{"abc": "4", "k": 1, "k": 2}' + d = _pypyjson.loads(a) + assert d == {u"abc": u"4", u"k": 2} + a = '{"abc": "4", "k": 1, "k": 1.5, "c": null, "k": 2}' + d = _pypyjson.loads(a) + assert d == {u"abc": u"4", u"c": None, u"k": 2} _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit