[pypy-commit] pypy default: merge json-decoder-maps:

cfbolz Sat, 21 Sep 2019 12:24:36 -0700

Author: Carl Friedrich Bolz-Tereick <cfb...@gmx.de>
Branch: 
Changeset: r97581:89a3715662ef
Date: 2019-09-21 21:06 +0200
http://bitbucket.org/pypy/pypy/changeset/89a3715662ef/


Log:    merge json-decoder-maps:

        implement a much faster json decoder (3x speedup for large json
        files, 2x less memory) used techniques:

        - SWAR (SIMD within a register) techniques for finding the end of
        whitespace and the end of strings
        - cache strings aggressively
        - use maps for representing the resulting objects.

diff too long, truncating to 2000 out of 2102 lines

diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -5,3 +5,9 @@
 .. this is a revision shortly after release-pypy-7.2.0
 .. startrev: 78cd4acbcbec 
 
+
+.. branch: json-decoder-maps
+
+Much faster and more memory-efficient JSON decoding. The resulting
+dictionaries that come out of the JSON decoder have faster lookups too.
+
diff --git a/pypy/module/_pypyjson/interp_decoder.py 
b/pypy/module/_pypyjson/interp_decoder.py
--- a/pypy/module/_pypyjson/interp_decoder.py
+++ b/pypy/module/_pypyjson/interp_decoder.py
@@ -1,11 +1,13 @@
 import sys
 from rpython.rlib.rstring import StringBuilder
-from rpython.rlib.objectmodel import specialize, always_inline, r_dict
-from rpython.rlib import rfloat, rutf8
+from rpython.rlib.objectmodel import specialize, always_inline
+from rpython.rlib import rfloat, runicode, jit, objectmodel, rutf8
 from rpython.rtyper.lltypesystem import lltype, rffi
 from rpython.rlib.rarithmetic import r_uint
 from pypy.interpreter.error import oefmt
 from pypy.interpreter import unicodehelper
+from pypy.interpreter.baseobjspace import W_Root
+from pypy.module._pypyjson import simd
 
 OVF_DIGITS = len(str(sys.maxint))
 
@@ -15,45 +17,107 @@
 # precomputing negative powers of 10 is MUCH faster than using e.g. math.pow
 # at runtime
 NEG_POW_10 = [10.0**-i for i in range(16)]
+del i
+
 def neg_pow_10(x, exp):
     if exp >= len(NEG_POW_10):
         return 0.0
     return x * NEG_POW_10[exp]
 
-def slice_eq(a, b):
-    (ll_chars1, start1, length1, _) = a
-    (ll_chars2, start2, length2, _) = b
-    if length1 != length2:
-        return False
-    j = start2
-    for i in range(start1, start1 + length1):
-        if ll_chars1[i] != ll_chars2[j]:
-            return False
-        j += 1
-    return True
 
-def slice_hash(a):
-    (ll_chars, start, length, h) = a
-    return h
+class IntCache(object):
+    """ A cache for wrapped ints between START and END """
 
-TYPE_UNKNOWN = 0
-TYPE_STRING = 1
-class JSONDecoder(object):
+    # I also tried various combinations of having an LRU cache for ints as
+    # well, didn't really help.
+
+    # XXX one thing to do would be to use withintprebuilt in general again,
+    # hidden behind a 'we_are_jitted'
+
+    START = -10
+    END = 256
+
+    def __init__(self, space):
+        self.space = space
+        self.cache = [self.space.newint(i)
+                for i in range(self.START, self.END)]
+
+    def newint(self, intval):
+        if self.START <= intval < self.END:
+            return self.cache[intval - self.START]
+        return self.space.newint(intval)
+
+
+class JSONDecoder(W_Root):
+
+    LRU_SIZE = 16
+    LRU_MASK = LRU_SIZE - 1
+
+    DEFAULT_SIZE_SCRATCH = 20
+
+    # string caching is only used if the total size of the message is larger
+    # than a megabyte. Below that, there can't be that many repeated big
+    # strings anyway (some experiments showed this to be a reasonable cutoff
+    # size)
+    MIN_SIZE_FOR_STRING_CACHE = 1024 * 1024
+
+    # evaluate the string cache for 200 strings, before looking at the hit rate
+    # and deciding whether to keep doing it
+    STRING_CACHE_EVALUATION_SIZE = 200
+
+    # keep using the string cache if at least 25% of all decoded strings are a
+    # hit in the cache
+    STRING_CACHE_USEFULNESS_FACTOR = 4
+
+
     def __init__(self, space, s):
         self.space = space
+        self.w_empty_string = space.newutf8("", 0)
+
         self.s = s
+
         # we put our string in a raw buffer so:
         # 1) we automatically get the '\0' sentinel at the end of the string,
         #    which means that we never have to check for the "end of string"
         # 2) we can pass the buffer directly to strtod
-        self.ll_chars = rffi.str2charp(s)
+        self.ll_chars, self.llobj, self.flag = 
rffi.get_nonmovingbuffer_ll_final_null(self.s)
         self.end_ptr = lltype.malloc(rffi.CCHARPP.TO, 1, flavor='raw')
         self.pos = 0
-        self.cache = r_dict(slice_eq, slice_hash, simple_hash_eq=True)
+        self.intcache = space.fromcache(IntCache)
+
+        # two caches, one for keys, one for general strings. they both have the
+        # form {hash-as-int: StringCacheEntry} and they don't deal with
+        # collisions at all. For every hash there is simply one string stored
+        # and we ignore collisions.
+        self.cache_keys = {}
+        self.cache_values = {}
+
+        # we don't cache *all* non-key strings, that would be too expensive.
+        # instead, keep a cache of the last 16 strings hashes around and add a
+        # string to the cache only if its hash is seen a second time
+        self.lru_cache = [0] * self.LRU_SIZE
+        self.lru_index = 0
+
+        self.startmap = self.space.fromcache(Terminator)
+
+        # keep a list of objects that are created with maps that aren't clearly
+        # useful. If they turn out to be useful in the end we are good,
+        # otherwise convert them to dicts (see .close())
+        self.unclear_objects = []
+
+        # this is a freelist of lists that store the decoded value of an
+        # object, before they get copied into the eventual dict
+        self.scratch = [[None] * self.DEFAULT_SIZE_SCRATCH]
+
 
     def close(self):
-        rffi.free_charp(self.ll_chars)
+        rffi.free_nonmovingbuffer_ll(self.ll_chars, self.llobj, self.flag)
         lltype.free(self.end_ptr, flavor='raw')
+        # clean up objects that are instances of now blocked maps
+        for w_obj in self.unclear_objects:
+            jsonmap = self._get_jsonmap_from_dict(w_obj)
+            if jsonmap.is_state_blocked():
+                self._devolve_jsonmap_dict(w_obj)
 
     def getslice(self, start, end):
         assert start >= 0
@@ -61,23 +125,22 @@
         return self.s[start:end]
 
     def skip_whitespace(self, i):
+        ll_chars = self.ll_chars
         while True:
-            ch = self.ll_chars[i]
+            ch = ll_chars[i]
             if is_whitespace(ch):
-                i+=1
+                i += 1
             else:
                 break
         return i
 
-    @specialize.arg(1)
-    def _raise(self, msg, *args):
-        raise oefmt(self.space.w_ValueError, msg, *args)
-
-    def decode_any(self, i):
+    def decode_any(self, i, contextmap=None):
+        """ Decode an object at position i. Optionally pass a contextmap, if
+        the value is decoded as the value of a dict. """
         i = self.skip_whitespace(i)
         ch = self.ll_chars[i]
         if ch == '"':
-            return self.decode_string(i+1)
+            return self.decode_string(i+1, contextmap)
         elif ch == '[':
             return self.decode_array(i+1)
         elif ch == '{':
@@ -102,6 +165,11 @@
             self._raise("No JSON object could be decoded: unexpected '%s' at 
char %d",
                         ch, i)
 
+
+    @specialize.arg(1)
+    def _raise(self, msg, *args):
+        raise oefmt(self.space.w_ValueError, msg, *args)
+
     def decode_null(self, i):
         if (self.ll_chars[i]   == 'u' and
             self.ll_chars[i+1] == 'l' and
@@ -162,7 +230,7 @@
             return self.decode_int_slow(start)
 
         self.pos = i
-        return self.space.newint(intval)
+        return self.intcache.newint(intval)
 
     def decode_float(self, i):
         from rpython.rlib import rdtoa
@@ -214,7 +282,22 @@
         ovf_maybe = (count >= OVF_DIGITS)
         return i, ovf_maybe, sign * intval
 
+    def _raise_control_char_in_string(self, ch, startindex, currindex):
+        if ch == '\0':
+            self._raise("Unterminated string starting at char %d",
+                        startindex - 1)
+        else:
+            self._raise("Invalid control character at char %d", currindex-1)
+
+    def _raise_object_error(self, ch, start, i):
+        if ch == '\0':
+            self._raise("Unterminated object starting at char %d", start)
+        else:
+            self._raise("Unexpected '%s' when decoding object (char %d)",
+                        ch, i)
+
     def decode_array(self, i):
+        """ Decode a list. i must be after the opening '[' """
         w_list = self.space.newlist([])
         start = i
         i = self.skip_whitespace(start)
@@ -248,62 +331,116 @@
             self.pos = i+1
             return self.space.newdict()
 
-        d = self._create_empty_dict()
+        if self.scratch:
+            values_w = self.scratch.pop()
+        else:
+            values_w = [None] * self.DEFAULT_SIZE_SCRATCH
+        nextindex = 0
+        currmap = self.startmap
         while True:
             # parse a key: value
-            w_name = self.decode_key(i)
+            currmap = self.decode_key_map(i, currmap)
             i = self.skip_whitespace(self.pos)
             ch = self.ll_chars[i]
             if ch != ':':
                 self._raise("No ':' found at char %d", i)
             i += 1
-            i = self.skip_whitespace(i)
-            #
-            w_value = self.decode_any(i)
-            d[w_name] = w_value
+
+            w_value = self.decode_any(i, currmap)
+
+            if nextindex == len(values_w):  # full
+                values_w = values_w + [None] * len(values_w)  # double
+            values_w[nextindex] = w_value
+            nextindex += 1
             i = self.skip_whitespace(self.pos)
             ch = self.ll_chars[i]
             i += 1
             if ch == '}':
                 self.pos = i
-                return self._create_dict(d)
+                self.scratch.append(values_w)  # can reuse next time
+                if currmap.is_state_blocked():
+                    dict_w = self._switch_to_dict(currmap, values_w, nextindex)
+                    return self._create_dict(dict_w)
+                values_w = values_w[:nextindex]
+                w_res = self._create_dict_map(values_w, currmap)
+                if not currmap.is_state_useful():
+                    self.unclear_objects.append(w_res)
+                return w_res
             elif ch == ',':
-                pass
-            elif ch == '\0':
-                self._raise("Unterminated object starting at char %d", start)
+                i = self.skip_whitespace(i)
+                if currmap.is_state_blocked():
+                    self.scratch.append(values_w)  # can reuse next time
+                    dict_w = self._switch_to_dict(currmap, values_w, nextindex)
+                    return self.decode_object_dict(i, start, dict_w)
             else:
-                self._raise("Unexpected '%s' when decoding object (char %d)",
-                            ch, i-1)
+                self._raise_object_error(ch, start, i - 1)
 
-    def decode_string(self, i):
-        start = i
-        bits = 0
+    def _create_dict_map(self, values_w, jsonmap):
+        from pypy.objspace.std.jsondict import from_values_and_jsonmap
+        return from_values_and_jsonmap(self.space, values_w, jsonmap)
+
+    def _devolve_jsonmap_dict(self, w_dict):
+        from pypy.objspace.std.jsondict import devolve_jsonmap_dict
+        devolve_jsonmap_dict(w_dict)
+
+    def _get_jsonmap_from_dict(self, w_dict):
+        from pypy.objspace.std.jsondict import get_jsonmap_from_dict
+        return get_jsonmap_from_dict(w_dict)
+
+    def _switch_to_dict(self, currmap, values_w, nextindex):
+        dict_w = self._create_empty_dict()
+        currmap.fill_dict(dict_w, values_w)
+        assert len(dict_w) == nextindex
+        return dict_w
+
+    def decode_object_dict(self, i, start, dict_w):
         while True:
-            # this loop is a fast path for strings which do not contain escape
-            # characters
+            # parse a key: value
+            w_key = self.decode_key_string(i)
+            i = self.skip_whitespace(self.pos)
+            ch = self.ll_chars[i]
+            if ch != ':':
+                self._raise("No ':' found at char %d", i)
+            i += 1
+
+            w_value = self.decode_any(i)
+            dict_w[w_key] = w_value
+            i = self.skip_whitespace(self.pos)
             ch = self.ll_chars[i]
             i += 1
-            bits |= ord(ch)
-            if ch == '"':
+            if ch == '}':
                 self.pos = i
-                return self._create_string(start, i - 1, bits)
-            elif ch == '\\' or ch < '\x20':
-                self.pos = i-1
-                return self.decode_string_escaped(start)
+                return self._create_dict(dict_w)
+            elif ch == ',':
+                i = self.skip_whitespace(i)
+            else:
+                self._raise_object_error(ch, start, i - 1)
 
-    def _create_string(self, start, end, bits):
-        if bits & 0x80:
-            # the 8th bit is set, it's an utf8 string
-            content_utf8 = self.getslice(start, end)
+    def decode_string_uncached(self, i):
+        start = i
+        ll_chars = self.ll_chars
+        nonascii, i = simd.find_end_of_string_no_hash(ll_chars, i, len(self.s))
+        ch = ll_chars[i]
+        if ch == '\\':
+            self.pos = i
+            return self.decode_string_escaped(start, nonascii)
+        if ch < '\x20':
+            self._raise_control_char_in_string(ch, start, i)
+        else:
+            assert ch == '"'
+
+        self.pos = i + 1
+        return self._create_string_wrapped(start, i, nonascii)
+
+    def _create_string_wrapped(self, start, end, nonascii):
+        content = self.getslice(start, end)
+        if nonascii:
+            # contains non-ascii chars, we need to check that it's valid utf-8
             lgt = unicodehelper.check_utf8_or_raise(self.space,
-                                                          content_utf8)
-            return self.space.newutf8(content_utf8, lgt)
+                                                          content)
         else:
-            # ascii only, fast path (ascii is a strict subset of
-            # latin1, and we already checked that all the chars are <
-            # 128)
-            return self.space.newutf8(self.getslice(start, end),
-                                      end - start)
+            lgt = end - start
+        return self.space.newutf8(content, lgt)
 
     def _create_dict(self, d):
         from pypy.objspace.std.dictmultiobject import from_unicode_key_dict
@@ -313,8 +450,7 @@
         from pypy.objspace.std.dictmultiobject import 
create_empty_unicode_key_dict
         return create_empty_unicode_key_dict(self.space)
 
-
-    def decode_string_escaped(self, start):
+    def decode_string_escaped(self, start, nonascii):
         i = self.pos
         builder = StringBuilder((i - start) * 2) # just an estimate
         assert start >= 0
@@ -325,25 +461,21 @@
             i += 1
             if ch == '"':
                 content_utf8 = builder.build()
-                lgt = unicodehelper.check_utf8_or_raise(self.space,
+                length = unicodehelper.check_utf8_or_raise(self.space,
                                                            content_utf8)
                 self.pos = i
-                return self.space.newutf8(content_utf8, lgt)
+                return self.space.newutf8(content_utf8, length)
             elif ch == '\\':
-                i = self.decode_escape_sequence(i, builder)
+                i = self.decode_escape_sequence_to_utf8(i, builder)
             elif ch < '\x20':
-                if ch == '\0':
-                    self._raise("Unterminated string starting at char %d",
-                                start - 1)
-                else:
-                    self._raise("Invalid control character at char %d", i-1)
+                self._raise_control_char_in_string(ch, start, i)
             else:
                 builder.append(ch)
 
-    def decode_escape_sequence(self, i, builder):
+    def decode_escape_sequence_to_utf8(self, i, stringbuilder):
         ch = self.ll_chars[i]
         i += 1
-        put = builder.append
+        put = stringbuilder.append
         if ch == '\\':  put('\\')
         elif ch == '"': put('"' )
         elif ch == '/': put('/' )
@@ -353,22 +485,37 @@
         elif ch == 'r': put('\r')
         elif ch == 't': put('\t')
         elif ch == 'u':
-            return self.decode_escape_sequence_unicode(i, builder)
+            # may be a surrogate pair
+            return self.decode_escape_sequence_unicode(i, stringbuilder)
         else:
             self._raise("Invalid \\escape: %s (char %d)", ch, i-1)
         return i
 
+    def _get_int_val_from_hex4(self, i):
+        ll_chars = self.ll_chars
+        res = 0
+        for i in range(i, i + 4):
+            ch = ord(ll_chars[i])
+            if ord('a') <= ch <= ord('f'):
+                digit = ch - ord('a') + 10
+            elif ord('A') <= ch <= ord('F'):
+                digit = ch - ord('A') + 10
+            elif ord('0') <= ch <= ord('9'):
+                digit = ch - ord('0')
+            else:
+                raise ValueError
+            res = (res << 4) + digit
+        return res
+
     def decode_escape_sequence_unicode(self, i, builder):
         # at this point we are just after the 'u' of the \u1234 sequence.
         start = i
         i += 4
-        hexdigits = self.getslice(start, i)
         try:
-            val = int(hexdigits, 16)
+            val = self._get_int_val_from_hex4(start)
             if (0xd800 <= val <= 0xdbff and
                     self.ll_chars[i] == '\\' and self.ll_chars[i+1] == 'u'):
-                hexdigits = self.getslice(i+2, i+6)
-                lowsurr = int(hexdigits, 16)
+                lowsurr = self._get_int_val_from_hex4(i + 2)
                 if 0xdc00 <= lowsurr <= 0xdfff:
                     # decode surrogate pair
                     val = 0x10000 + (((val - 0xd800) << 10) |
@@ -383,45 +530,618 @@
         builder.append(utf8_ch)
         return i
 
-    def decode_key(self, i):
-        """ returns a wrapped unicode """
-        from rpython.rlib.rarithmetic import intmask
 
-        i = self.skip_whitespace(i)
+    def decode_string(self, i, contextmap=None):
+        """ Decode a string at position i (which is right after the opening ").
+        Optionally pass a contextmap, if the value is decoded as the value of a
+        dict."""
+
+        ll_chars = self.ll_chars
+        start = i
+        ch = ll_chars[i]
+        if ch == '"':
+            self.pos = i + 1
+            return self.w_empty_string # surprisingly common
+
+        cache = True
+        if contextmap is not None:
+            # keep some statistics about the usefulness of the string cache on
+            # the contextmap
+            # the intuition about the contextmap is as follows:
+            # often there are string values stored in dictionaries that can
+            # never be usefully cached, like unique ids of objects. Then the
+            # strings *in those fields* of all objects should never be cached.
+            # However, the content of other fields can still be useful to
+            # cache.
+            contextmap.decoded_strings += 1
+            if not contextmap.should_cache_strings():
+                cache = False
+        if len(self.s) < self.MIN_SIZE_FOR_STRING_CACHE:
+            cache = False
+
+        if not cache:
+            return self.decode_string_uncached(i)
+
+        strhash, nonascii, i = simd.find_end_of_string(ll_chars, i, 
len(self.s))
+        ch = ll_chars[i]
+        if ch == '\\':
+            self.pos = i
+            return self.decode_string_escaped(start, nonascii)
+        if ch < '\x20':
+            self._raise_control_char_in_string(ch, start, i)
+        else:
+            assert ch == '"'
+
+        self.pos = i + 1
+
+        length = i - start
+        strhash ^= length
+
+        # check cache first:
+        try:
+            entry = self.cache_values[strhash]
+        except KeyError:
+            w_res = self._create_string_wrapped(start, i, nonascii)
+            # only add *some* strings to the cache, because keeping them all is
+            # way too expensive. first we check if the contextmap has caching
+            # disabled completely. if not, we check whether we have recently
+            # seen the same hash already, if yes, we cache the string.
+            if ((contextmap is not None and
+                        contextmap.decoded_strings < 
self.STRING_CACHE_EVALUATION_SIZE) or
+                    strhash in self.lru_cache):
+                entry = StringCacheEntry(
+                        self.getslice(start, start + length), w_res)
+                self.cache_values[strhash] = entry
+            else:
+                self.lru_cache[self.lru_index] = strhash
+                self.lru_index = (self.lru_index + 1) & self.LRU_MASK
+            return w_res
+        if not entry.compare(ll_chars, start, length):
+            # collision! hopefully rare
+            return self._create_string_wrapped(start, i, nonascii)
+        if contextmap is not None:
+            contextmap.cache_hits += 1
+        return entry.w_uni
+
+    def decode_key_map(self, i, currmap):
+        """ Given the current map currmap of an object, decode the next key at
+        position i. This returns the new map of the object. """
+        newmap = self._decode_key_map(i, currmap)
+        currmap.observe_transition(newmap, self.startmap)
+        return newmap
+
+    def _decode_key_map(self, i, currmap):
+        ll_chars = self.ll_chars
+        # first try to see whether we happen to find currmap.nextmap_first
+        nextmap = currmap.fast_path_key_parse(self, i)
+        if nextmap is not None:
+            return nextmap
+
+        start = i
+        ch = ll_chars[i]
+        if ch != '"':
+            self._raise("Key name must be string at char %d", i)
+        i += 1
+        w_key = self._decode_key_string(i)
+        return currmap.get_next(w_key, self.s, start, self.pos, self.startmap)
+
+    def _decode_key_string(self, i):
+        """ decode key at position i as a string. Key strings are always
+        cached, since they repeat a lot. """
+        ll_chars = self.ll_chars
+        start = i
+
+        strhash, nonascii, i = simd.find_end_of_string(ll_chars, i, 
len(self.s))
+
+        ch = ll_chars[i]
+        if ch == '\\':
+            self.pos = i
+            w_key = self.decode_string_escaped(start, nonascii)
+            return w_key
+        if ch < '\x20':
+            self._raise_control_char_in_string(ch, start, i)
+        length = i - start
+        strhash ^= length
+        self.pos = i + 1
+        # check cache first:
+        try:
+            entry = self.cache_keys[strhash]
+        except KeyError:
+            w_res = self._create_string_wrapped(start, i, nonascii)
+            entry = StringCacheEntry(
+                    self.getslice(start, start + length), w_res)
+            self.cache_keys[strhash] = entry
+            return w_res
+        if not entry.compare(ll_chars, start, length):
+            # collision! hopefully rare
+            w_res = self._create_string_wrapped(start, i, nonascii)
+        else:
+            w_res = entry.w_uni
+        return w_res
+
+    def decode_key_string(self, i):
         ll_chars = self.ll_chars
         ch = ll_chars[i]
         if ch != '"':
             self._raise("Key name must be string at char %d", i)
         i += 1
+        return self._decode_key_string(i)
 
-        start = i
-        bits = 0
-        strhash = ord(ll_chars[i]) << 7
-        while True:
-            ch = ll_chars[i]
+
+class StringCacheEntry(object):
+    """ A cache entry, bundling the encoded version of a string as it appears
+    in the input string, and its wrapped decoded variant. """
+    def __init__(self, repr, w_uni):
+        # repr is the escaped string
+        self.repr = repr
+        # uni is the wrapped decoded string
+        self.w_uni = w_uni
+
+    def compare(self, ll_chars, start, length):
+        """ Check whether self.repr occurs at ll_chars[start:start+length] """
+        if length != len(self.repr):
+            return False
+        index = start
+        for c in self.repr:
+            if not ll_chars[index] == c:
+                return False
+            index += 1
+        return True
+
+
+class MapBase(object):
+    """ A map implementation to speed up parsing of json dicts, and to
+    represent the resulting dicts more compactly and make access faster. """
+
+    # the basic problem we are trying to solve is the following: dicts in
+    # json can either be used as objects, or as dictionaries with arbitrary
+    # string keys. We want to use maps for the former, but not for the
+    # latter. But we don't know in advance which kind of dict is which.
+
+    # Therefore we create "preliminary" maps where we aren't quite sure yet
+    # whether they are really useful maps or not. If we see them used often
+    # enough, we promote them to "useful" maps, which we will actually
+    # instantiate objects with.
+
+    # If we determine that a map is not used often enough, we can turn it
+    # into a "blocked" map, which is a point in the map tree where we will
+    # switch to regular dicts, when we reach that part of the tree.
+
+    # One added complication: We want to keep the number of preliminary maps
+    # bounded to prevent generating tons of useless maps. but also not too
+    # small, to support having a json file that contains many uniform objects
+    # with tons of keys. That's where the idea of "fringe" maps comes into
+    # play. They are maps that sit between known useful nodes and preliminary
+    # nodes in the map transition tree. We bound only the number of fringe
+    # nodes we are considering (to MAX_FRINGE), but not the number of
+    # preliminary maps. When we have too many fringe maps, we remove the least
+    # commonly instantiated fringe map and mark it as blocked.
+
+    # allowed graph edges or nodes in nextmap_all:
+    #    USEFUL -------
+    #   /      \       \
+    #  v        v       v
+    # FRINGE   USEFUL   BLOCKED
+    #  |
+    #  v
+    # PRELIMINARY
+    #  |
+    #  v
+    # PRELIMINARY
+
+    # state transitions:
+    #   PRELIMINARY
+    #   /   |       \
+    #   |   v        v
+    #   | FRINGE -> USEFUL
+    #   |   |
+    #   \   |
+    #    v  v
+    #   BLOCKED
+
+    # the nextmap_first edge can only be these graph edges:
+    #  USEFUL
+    #   |
+    #   v
+    #  USEFUL
+    #
+    #  FRINGE
+    #   |
+    #   v
+    #  PRELIMINARY
+    #   |
+    #   v
+    #  PRELIMINARY
+
+    USEFUL = 'u'
+    PRELIMINARY = 'p'
+    FRINGE = 'f' # buffer between PRELIMINARY and USEFUL
+    BLOCKED = 'b'
+
+    # tunable parameters
+    MAX_FRINGE = 40
+    USEFUL_THRESHOLD = 5
+
+    def __init__(self, space):
+        self.space = space
+
+        # a single transition is stored in .nextmap_first
+        self.nextmap_first = None
+
+        # nextmap_all is only initialized after seeing the *second* transition
+        # but then it also contains .nextmap_first
+        self.nextmap_all = None # later dict {key: nextmap}
+
+        # keep some statistics about every map: how often it was instantiated
+        # and how many non-blocked leaves the map transition tree has, starting
+        # from self
+        self.instantiation_count = 0
+        self.number_of_leaves = 1
+
+    def _check_invariants(self):
+        if self.nextmap_all:
+            for next in self.nextmap_all.itervalues():
+                next._check_invariants()
+        elif self.nextmap_first:
+            self.nextmap_first._check_invariants()
+
+    def get_next(self, w_key, string, start, stop, terminator):
+        from pypy.objspace.std.dictmultiobject import unicode_hash, unicode_eq
+        if isinstance(self, JSONMap):
+            assert not self.state == MapBase.BLOCKED
+        nextmap_first = self.nextmap_first
+        if (nextmap_first is not None and
+                nextmap_first.w_key.eq_w(w_key)):
+            return nextmap_first
+
+        assert stop >= 0
+        assert start >= 0
+
+        if nextmap_first is None:
+            # first transition ever seen, don't initialize nextmap_all
+            next = self._make_next_map(w_key, string[start:stop])
+            self.nextmap_first = next
+        else:
+            if self.nextmap_all is None:
+                # 2nd transition ever seen
+                self.nextmap_all = objectmodel.r_dict(unicode_eq, unicode_hash,
+                  force_non_null=True, simple_hash_eq=True)
+                self.nextmap_all[nextmap_first.w_key] = nextmap_first
+            else:
+                next = self.nextmap_all.get(w_key, None)
+                if next is not None:
+                    return next
+            # if we are at this point we didn't find the transition yet, so
+            # create a new one
+            next = self._make_next_map(w_key, string[start:stop])
+            self.nextmap_all[w_key] = next
+
+            # one new leaf has been created
+            self.change_number_of_leaves(1)
+
+        terminator.register_potential_fringe(next)
+        return next
+
+    def change_number_of_leaves(self, difference):
+        """ add difference to .number_of_leaves of self and its parents """
+        if not difference:
+            return
+        parent = self
+        while isinstance(parent, JSONMap):
+            parent.number_of_leaves += difference
+            parent = parent.prev
+        parent.number_of_leaves += difference # terminator
+
+    def fast_path_key_parse(self, decoder, position):
+        """ Fast path when parsing the next key: We speculate that we will
+        always see a commonly seen next key, and use strcmp (implemented in
+        key_repr_cmp) to check whether that is the case. """
+        nextmap_first = self.nextmap_first
+        if nextmap_first:
+            ll_chars = decoder.ll_chars
+            assert isinstance(nextmap_first, JSONMap)
+            if nextmap_first.key_repr_cmp(ll_chars, position):
+                decoder.pos = position + len(nextmap_first.key_repr)
+                return nextmap_first
+        return None
+
+    def observe_transition(self, newmap, terminator):
+        """ observe a transition from self to newmap.
+        This does a few things, including updating the self size estimate with
+        the knowledge that one object transitioned from self to newmap.
+        also it potentially decides that self should move to state USEFUL."""
+        newmap.instantiation_count += 1
+        if isinstance(self, JSONMap) and self.state == MapBase.FRINGE:
+            if self.is_useful():
+                self.mark_useful(terminator)
+
+    def _make_next_map(self, w_key, key_repr):
+        return JSONMap(self.space, self, w_key, key_repr)
+
+    def fill_dict(self, dict_w, values_w):
+        """ recursively fill the dictionary dict_w in the correct order,
+        reading from values_w."""
+        raise NotImplementedError("abstract base")
+
+    def _all_dot(self, output):
+        identity = objectmodel.compute_unique_id(self)
+        output.append('%s [shape=box%s];' % (identity, self._get_dot_text()))
+        if self.nextmap_all:
+            for w_key, value in self.nextmap_all.items():
+                assert isinstance(value, JSONMap)
+                if value is self.nextmap_first:
+                    color = ", color=blue"
+                else:
+                    color = ""
+                output.append('%s -> %s [label="%s"%s];' % (
+                    identity, objectmodel.compute_unique_id(value), 
value.w_key._utf8, color))
+                value._all_dot(output)
+        elif self.nextmap_first is not None:
+            value = self.nextmap_first
+            output.append('%s -> %s [label="%s", color=blue];' % (
+                identity, objectmodel.compute_unique_id(value), 
value.w_key._utf8))
+            value._all_dot(output)
+
+
+    def _get_dot_text(self):
+        return ", label=base"
+
+    def view(self):
+        from dotviewer import graphclient
+        import pytest
+        r = ["digraph G {"]
+        self._all_dot(r)
+        r.append("}")
+        p = pytest.ensuretemp("jsonmap").join("temp.dot")
+        p.write("\n".join(r))
+        graphclient.display_dot_file(str(p))
+
+
+class Terminator(MapBase):
+    """ The root node of the map transition tree. """
+    def __init__(self, space):
+        MapBase.__init__(self, space)
+        # a set of all map nodes that are currently in the FRINGE state
+        self.current_fringe = {}
+
+    def register_potential_fringe(self, prelim):
+        """ add prelim to the fringe, if its prev is either a Terminator or
+        useful. """
+        prev = prelim.prev
+        if (isinstance(prev, Terminator) or
+                isinstance(prev, JSONMap) and prev.state == MapBase.USEFUL):
+            assert prelim.state == MapBase.PRELIMINARY
+            prelim.state = MapBase.FRINGE
+
+            if len(self.current_fringe) > MapBase.MAX_FRINGE:
+                self.cleanup_fringe()
+            self.current_fringe[prelim] = None
+
+    def remove_from_fringe(self, former_fringe):
+        """ Remove former_fringe from self.current_fringe. """
+        assert former_fringe.state in (MapBase.USEFUL, MapBase.BLOCKED)
+        del self.current_fringe[former_fringe]
+
+    def cleanup_fringe(self):
+        """ remove the least-instantiated fringe map and block it."""
+        min_fringe = None
+        min_avg = 1e200
+        for f in self.current_fringe:
+            assert f.state == MapBase.FRINGE
+            avg = f.average_instantiation()
+            if avg < min_avg:
+                min_avg = avg
+                min_fringe = f
+        assert min_fringe
+        min_fringe.mark_blocked(self)
+
+    def fill_dict(self, dict_w, values_w):
+        """ recursively fill the dictionary dict_w in the correct order,
+        reading from values_w."""
+        return 0
+
+    def _check_invariants(self):
+        for fringe in self.current_fringe:
+            assert fringe.state == MapBase.FRINGE
+
+class JSONMap(MapBase):
+    """ A map implementation to speed up parsing """
+
+    def __init__(self, space, prev, w_key, key_repr):
+        MapBase.__init__(self, space)
+
+        self.prev = prev
+        self.w_key = w_key
+        self.key_repr = key_repr
+
+        self.state = MapBase.PRELIMINARY
+
+        # key decoding stats
+        self.decoded_strings = 0
+        self.cache_hits = 0
+
+        # for jsondict support
+        self.key_to_index = None
+        self.keys_in_order = None
+        self.strategy_instance = None
+
+    def __repr__(self):
+        return "<JSONMap key_repr=%s #instantiation=%s #leaves=%s prev=%r>" % (
+                self.key_repr, self.instantiation_count, 
self.number_of_leaves, self.prev)
+
+    def _get_terminator(self): # only for _check_invariants
+        while isinstance(self, JSONMap):
+            self = self.prev
+        assert isinstance(self, Terminator)
+        return self
+
+    def _check_invariants(self):
+        assert self.state in (
+            MapBase.USEFUL,
+            MapBase.PRELIMINARY,
+            MapBase.FRINGE,
+            MapBase.BLOCKED,
+        )
+
+        prev = self.prev
+        if isinstance(prev, JSONMap):
+            prevstate = prev.state
+        else:
+            prevstate = MapBase.USEFUL
+
+        if prevstate == MapBase.USEFUL:
+            assert self.state != MapBase.PRELIMINARY
+        elif prevstate == MapBase.PRELIMINARY:
+            assert self.state == MapBase.PRELIMINARY
+        elif prevstate == MapBase.FRINGE:
+            assert self.state == MapBase.PRELIMINARY
+        else:
+            # if prevstate is BLOCKED, we shouldn't have recursed here!
+            assert False, "should be unreachable"
+
+        if self.state == MapBase.BLOCKED:
+            assert self.nextmap_first is None
+            assert self.nextmap_all is None
+        elif self.state == MapBase.FRINGE:
+            assert self in self._get_terminator().current_fringe
+
+        MapBase._check_invariants(self)
+
+    def mark_useful(self, terminator):
+        """ mark self as useful, and also the most commonly instantiated
+        children, recursively """
+        was_fringe = self.state == MapBase.FRINGE
+        assert self.state in (MapBase.FRINGE, MapBase.PRELIMINARY)
+        self.state = MapBase.USEFUL
+        if was_fringe:
+            terminator.remove_from_fringe(self)
+        # find the most commonly instantiated child, store it into
+        # nextmap_first and mark it useful, recursively
+        maxchild = self.nextmap_first
+        if self.nextmap_all is not None:
+            for child in self.nextmap_all.itervalues():
+                if child.instantiation_count > maxchild.instantiation_count:
+                    maxchild = child
+        if maxchild is not None:
+            maxchild.mark_useful(terminator)
+            if self.nextmap_all:
+                for child in self.nextmap_all.itervalues():
+                    if child is not maxchild:
+                        terminator.register_potential_fringe(child)
+                self.nextmap_first = maxchild
+
+    def mark_blocked(self, terminator):
+        """ mark self and recursively all its children as blocked."""
+        was_fringe = self.state == MapBase.FRINGE
+        self.state = MapBase.BLOCKED
+        if was_fringe:
+            terminator.remove_from_fringe(self)
+        if self.nextmap_all:
+            for next in self.nextmap_all.itervalues():
+                next.mark_blocked(terminator)
+        elif self.nextmap_first:
+            self.nextmap_first.mark_blocked(terminator)
+        self.nextmap_first = None
+        self.nextmap_all = None
+        self.change_number_of_leaves(-self.number_of_leaves + 1)
+
+    def is_state_blocked(self):
+        return self.state == MapBase.BLOCKED
+
+    def is_state_useful(self):
+        return self.state == MapBase.USEFUL
+
+    def average_instantiation(self):
+        """ the number of instantiations, divided by the number of leaves. We
+        want to favor nodes that have either a high instantiation count, or few
+        leaves below it. """
+        return self.instantiation_count / float(self.number_of_leaves)
+
+    def is_useful(self):
+        return self.average_instantiation() > self.USEFUL_THRESHOLD
+
+    def should_cache_strings(self):
+        """ return whether strings parsed in the context of this map should be
+        cached. """
+        # we should cache if either we've seen few strings so far (less than
+        # STRING_CACHE_EVALUATION_SIZE), or if we've seen many, and the cache
+        # hit rate has been high enough
+        return not (self.decoded_strings > 
JSONDecoder.STRING_CACHE_EVALUATION_SIZE and
+                self.cache_hits * JSONDecoder.STRING_CACHE_USEFULNESS_FACTOR < 
self.decoded_strings)
+
+    def key_repr_cmp(self, ll_chars, i):
+        # XXX should we use "real" memcmp (here in particular, and in other
+        # places in RPython in general)?
+        for j, c in enumerate(self.key_repr):
+            if ll_chars[i] != c:
+                return False
             i += 1
-            if ch == '"':
-                break
-            elif ch == '\\' or ch < '\x20':
-                self.pos = i-1
-                return self.decode_string_escaped(start)
-            strhash = intmask((1000003 * strhash) ^ ord(ll_chars[i]))
-            bits |= ord(ch)
-        length = i - start - 1
-        if length == 0:
-            strhash = -1
+        return True
+
+    def fill_dict(self, dict_w, values_w):
+        index = self.prev.fill_dict(dict_w, values_w)
+        dict_w[self.w_key] = values_w[index]
+        return index + 1
+
+    # _____________________________________________________
+    # methods for JsonDictStrategy
+
+    @jit.elidable
+    def get_index(self, w_key):
+        from pypy.objspace.std.unicodeobject import W_UnicodeObject
+        assert isinstance(w_key, W_UnicodeObject)
+        return self.get_key_to_index().get(w_key, -1)
+
+    def get_key_to_index(self):
+        from pypy.objspace.std.dictmultiobject import unicode_hash, unicode_eq
+        key_to_index = self.key_to_index
+        if key_to_index is None:
+            key_to_index = self.key_to_index = objectmodel.r_dict(unicode_eq, 
unicode_hash,
+                  force_non_null=True, simple_hash_eq=True)
+            # compute depth
+            curr = self
+            depth = 0
+            while True:
+                depth += 1
+                curr = curr.prev
+                if not isinstance(curr, JSONMap):
+                    break
+
+            curr = self
+            while depth:
+                depth -= 1
+                key_to_index[curr.w_key] = depth
+                curr = curr.prev
+                if not isinstance(curr, JSONMap):
+                    break
+        return key_to_index
+
+    def get_keys_in_order(self):
+        keys_in_order = self.keys_in_order
+        if keys_in_order is None:
+            key_to_index = self.get_key_to_index()
+            keys_in_order = self.keys_in_order = [None] * len(key_to_index)
+            for w_key, index in key_to_index.iteritems():
+                keys_in_order[index] = w_key
+        return keys_in_order
+
+    # _____________________________________________________
+
+    def _get_dot_text(self):
+        if self.nextmap_all is None:
+            l = int(self.nextmap_first is not None)
         else:
-            strhash ^= length
-            strhash = intmask(strhash)
-        self.pos = i
-        # check cache first:
-        key = (ll_chars, start, length, strhash)
-        try:
-            return self.cache[key]
-        except KeyError:
-            pass
-        res = self._create_string(start, i - 1, bits)
-        self.cache[key] = res
+            l = len(self.nextmap_all)
+        extra = ""
+        if self.decoded_strings:
+            extra = "\\n%s/%s (%s%%)" % (self.cache_hits, 
self.decoded_strings, self.cache_hits/float(self.decoded_strings))
+        res = ', label="#%s\\nchildren: %s%s"' % (self.instantiation_count, l, 
extra)
+        if self.state == MapBase.BLOCKED:
+            res += ", fillcolor=lightsalmon"
+        if self.state == MapBase.FRINGE:
+            res += ", fillcolor=lightgray"
+        if self.state == MapBase.PRELIMINARY:
+            res += ", fillcolor=lightslategray"
         return res
 
 
@@ -442,3 +1162,4 @@
         return w_res
     finally:
         decoder.close()
+
diff --git a/pypy/module/_pypyjson/simd.py b/pypy/module/_pypyjson/simd.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_pypyjson/simd.py
@@ -0,0 +1,225 @@
+from rpython.rtyper.lltypesystem import lltype, rffi
+from rpython.rlib import objectmodel, unroll
+from rpython.rlib.rarithmetic import r_uint, intmask, LONG_BIT
+from rpython.jit.backend.detect_cpu import autodetect
+
+# accelerators for string operations using simd on regular word sizes (*not*
+# SSE instructions). this style is sometimes called SWAR (SIMD Within A
+# Register) or "broadword techniques"
+
+# XXX remove wordsize and endianness restrictions properly, so far only x86-64
+# is tested
+
+USE_SIMD = False
+if LONG_BIT == 64:
+    WORD_SIZE = 8
+    EVERY_BYTE_ONE = 0x0101010101010101
+    EVERY_BYTE_HIGHEST_BIT = 0x8080808080808080
+    if autodetect() == "x86-64":
+        USE_SIMD = True
+else:
+    WORD_SIZE = 4
+    EVERY_BYTE_ONE = 0x01010101
+    EVERY_BYTE_HIGHEST_BIT = 0x80808080
+
+
+# helpers
+
+unrolling_wordsize = unroll.unrolling_iterable(range(WORD_SIZE))
+
+def char_repeated_word_width(ch):
+    return r_uint(EVERY_BYTE_ONE) * ord(ch)
+
+def any_char_zero(word):
+    return (word - r_uint(EVERY_BYTE_ONE)) & ~word & 
r_uint(EVERY_BYTE_HIGHEST_BIT)
+
+def any_char_in_words_zero(*words):
+    return _any_char_in_any_word_zero_accum(0, *words)
+
+def _any_char_in_any_word_zero_accum(accum, word, *words):
+    accum |= (word - r_uint(EVERY_BYTE_ONE)) & ~word
+    if not words:
+        return accum & r_uint(EVERY_BYTE_HIGHEST_BIT)
+    return _any_char_in_any_word_zero_accum(accum, *words)
+
+def print_chars(word):
+    # for debugging
+    out = ''
+    for i in range(WORD_SIZE):
+        out += chr(word & 0xff)
+        word >>= 8
+    return out
+
+def index_nonzero(word):
+    # XXX can be done very cheap in theory
+    assert word
+    for i in unrolling_wordsize:
+        if word & 0xff:
+            return i
+        word >>= 8
+    assert 0
+
+def index_zero(word):
+    # XXX can be done very cheap in theory
+    assert any_char_zero(word)
+    for i in unrolling_wordsize:
+        if not word & 0xff:
+            return i
+        word >>= 8
+    assert 0 # XXX ???
+
+def set_high_bytes_to_zero(word, keep_bytes):
+    mask = ((~r_uint(0)) << (8 * keep_bytes))
+    return word & ~mask
+
+
+
+@objectmodel.always_inline
+def position_string_ender(word):
+    maskquote = char_repeated_word_width('"')
+    maskbackslash = char_repeated_word_width('\\')
+    maskx20 = char_repeated_word_width(chr(0xff - 0x1f))
+    # x1 and x2 check for equality, if a byte is 0 the corresponding
+    # char is equal to " or \
+    x1 = maskquote ^ word
+    x2 = maskbackslash ^ word
+    # x3 checks for char < 0x20, the byte is 0 in that case
+    x3 = maskx20 & word
+    return any_char_in_words_zero(x1, x2, x3)
+
+@objectmodel.always_inline
+def find_end_of_string_simd_unaligned(ll_chars, startpos, length):
+    ch = ll_chars[startpos]
+    strhash = (ord(ch) << 7) ^ 0x345678
+
+    wordarray = rffi.cast(rffi.ULONGP, rffi.ptradd(ll_chars, startpos))
+    num_safe_reads = (length - startpos) // WORD_SIZE
+
+    bits = 0
+    for i in range(num_safe_reads):
+        word = wordarray[i]
+        cond = position_string_ender(word)
+        if cond:
+            break
+        bits |= word
+        strhash = intmask((1000003 * strhash) ^ intmask(word))
+    else:
+        # didn't find end of string yet, look at remaining chars
+        word = 0
+        shift = 0
+        i = startpos + num_safe_reads * WORD_SIZE
+        while True:   # this loop should run at most WORD_SIZE times,
+                      # if we assume that ll_chars[length] == '\x00'
+            ch = ll_chars[i]
+            if ch == '"' or ch == '\\' or ch < '\x20':
+                break
+            i += 1
+            bits |= ord(ch)
+            word |= ord(ch) << shift
+            shift += 8
+        if shift:
+            strhash = intmask((1000003 * strhash) ^ intmask(word))
+
+        nonascii = bool(bits & char_repeated_word_width(chr(0x80)))
+        return strhash, nonascii, i
+
+    # compute endposition
+    nonzero = index_nonzero(cond)
+    endposition = startpos + i * WORD_SIZE + nonzero
+    if nonzero:
+        word = set_high_bytes_to_zero(word, nonzero)
+        bits |= word
+        strhash = intmask((1000003 * strhash) ^ intmask(word))
+
+    nonascii = bool(bits & char_repeated_word_width(chr(0x80)))
+
+    return strhash, nonascii, endposition
+
+@objectmodel.always_inline
+def find_end_of_string_simd_unaligned_no_hash(ll_chars, startpos, length):
+    ch = ll_chars[startpos]
+
+    wordarray = rffi.cast(rffi.ULONGP, rffi.ptradd(ll_chars, startpos))
+    num_safe_reads = (length - startpos) // WORD_SIZE
+
+    bits = 0
+    for i in range(num_safe_reads):
+        word = wordarray[i]
+        cond = position_string_ender(word)
+        if cond:
+            break
+        bits |= word
+    else:
+        # didn't find end of string yet, look at remaining chars
+        i = startpos + num_safe_reads * WORD_SIZE
+        while True:   # this loop should run at most WORD_SIZE times,
+                      # if we assume that ll_chars[length] == '\x00'
+            ch = ll_chars[i]
+            if ch == '"' or ch == '\\' or ch < '\x20':
+                break
+            i += 1
+            bits |= ord(ch)
+
+        nonascii = bool(bits & char_repeated_word_width(chr(0x80)))
+        return nonascii, i
+
+    # compute endposition
+    nonzero = index_nonzero(cond)
+    endposition = startpos + i * WORD_SIZE + nonzero
+    if nonzero:
+        word = set_high_bytes_to_zero(word, nonzero)
+        bits |= word
+
+    nonascii = bool(bits & char_repeated_word_width(chr(0x80)))
+
+    return nonascii, endposition
+
+
+@objectmodel.always_inline
+def find_end_of_string_slow(ll_chars, i, length):
+    ch = ll_chars[i]
+    strhash = (ord(ch) << 7) ^ 0x345678
+    word = 0
+    shift = 0
+
+    bits = 0
+
+    while True:
+        # this loop is a fast path for strings which do not contain escape
+        # characters
+        ch = ll_chars[i]
+        if ch == '"' or ch == '\\' or ch < '\x20':
+            break
+        i += 1
+        bits |= ord(ch)
+
+        word |= ord(ch) << shift
+        shift += 8
+        if shift == WORD_SIZE * 8:
+            strhash = intmask((1000003 * strhash) ^ word)
+            shift = 0
+            word = 0
+
+    if shift:
+        strhash = intmask((1000003 * strhash) ^ word)
+    return strhash, bool(bits & 0x80), i
+
+@objectmodel.always_inline
+def find_end_of_string_slow_no_hash(ll_chars, i, length):
+    bits = 0
+    while True:
+        # this loop is a fast path for strings which do not contain escape
+        # characters
+        ch = ll_chars[i]
+        if ch == '"' or ch == '\\' or ch < '\x20':
+            break
+        i += 1
+        bits |= ord(ch)
+    return bool(bits & 0x80), i
+
+if USE_SIMD:
+    find_end_of_string = find_end_of_string_simd_unaligned
+    find_end_of_string_no_hash = find_end_of_string_simd_unaligned_no_hash
+else:
+    find_end_of_string = find_end_of_string_slow
+    find_end_of_string_no_hash = find_end_of_string_slow_no_hash
diff --git a/pypy/module/_pypyjson/test/test__pypyjson.py 
b/pypy/module/_pypyjson/test/test__pypyjson.py
--- a/pypy/module/_pypyjson/test/test__pypyjson.py
+++ b/pypy/module/_pypyjson/test/test__pypyjson.py
@@ -1,31 +1,266 @@
 # -*- encoding: utf-8 -*-
-from pypy.module._pypyjson.interp_decoder import JSONDecoder
+import pytest
+from pypy.module._pypyjson.interp_decoder import JSONDecoder, Terminator, 
MapBase
+from rpython.rtyper.lltypesystem import lltype, rffi
 
-def test_skip_whitespace():
-    s = '   hello   '
-    dec = JSONDecoder('fake space', s)
-    assert dec.pos == 0
-    assert dec.skip_whitespace(0) == 3
-    assert dec.skip_whitespace(3) == 3
-    assert dec.skip_whitespace(8) == len(s)
-    dec.close()
 
-class FakeSpace(object):
-    def newutf8(self, s, l):
-        return s
+class TestJson(object):
+    def test_skip_whitespace(self):
+        s = '   hello   '
+        dec = JSONDecoder(self.space, s)
+        assert dec.pos == 0
+        assert dec.skip_whitespace(0) == 3
+        assert dec.skip_whitespace(3) == 3
+        assert dec.skip_whitespace(8) == len(s)
+        dec.close()
 
-def test_decode_key():
-    s1 = "123" * 100
-    s = ' "%s"   "%s" ' % (s1, s1)
-    dec = JSONDecoder(FakeSpace(), s)
-    assert dec.pos == 0
-    x = dec.decode_key(0)
-    assert x == s1
-    # check caching
-    y = dec.decode_key(dec.pos)
-    assert y == s1
-    assert y is x
-    dec.close()
+    def test_json_map(self):
+        m = Terminator(self.space)
+        w_a = self.space.newutf8("a", 1)
+        w_b = self.space.newutf8("b", 1)
+        w_c = self.space.newutf8("c", 1)
+        m1 = m.get_next(w_a, '"a"', 0, 3, m)
+        assert m1.w_key == w_a
+        assert m1.nextmap_first is None
+        assert m1.key_repr == '"a"'
+        assert m1.key_repr_cmp('"a": 123', 0)
+        assert not m1.key_repr_cmp('b": 123', 0)
+        assert m.nextmap_first.w_key == w_a
+
+        m2 = m.get_next(w_a, '"a"', 0, 3, m)
+        assert m2 is m1
+
+        m3 = m.get_next(w_b, '"b"', 0, 3, m)
+        assert m3.w_key == w_b
+        assert m3.nextmap_first is None
+        assert m3.key_repr == '"b"'
+        assert m.nextmap_first is m1
+
+        m4 = m3.get_next(w_c, '"c"', 0, 3, m)
+        assert m4.w_key == w_c
+        assert m4.nextmap_first is None
+        assert m4.key_repr == '"c"'
+        assert m3.nextmap_first is m4
+
+    def test_json_map_get_index(self):
+        m = Terminator(self.space)
+        w_a = self.space.newutf8("a", 1)
+        w_b = self.space.newutf8("b", 1)
+        w_c = self.space.newutf8("c", 1)
+        m1 = m.get_next(w_a, 'a"', 0, 2, m)
+        assert m1.get_index(w_a) == 0
+        assert m1.get_index(w_b) == -1
+
+        m2 = m.get_next(w_b, 'b"', 0, 2, m)
+        assert m2.get_index(w_b) == 0
+        assert m2.get_index(w_a) == -1
+
+        m3 = m2.get_next(w_c, 'c"', 0, 2, m)
+        assert m3.get_index(w_b) == 0
+        assert m3.get_index(w_c) == 1
+        assert m3.get_index(w_a) == -1
+
+    def test_jsonmap_fill_dict(self):
+        from collections import OrderedDict
+        m = Terminator(self.space)
+        space = self.space
+        w_a = space.newutf8("a", 1)
+        w_b = space.newutf8("b", 1)
+        w_c = space.newutf8("c", 1)
+        m1 = m.get_next(w_a, 'a"', 0, 2, m)
+        m2 = m1.get_next(w_b, 'b"', 0, 2, m)
+        m3 = m2.get_next(w_c, 'c"', 0, 2, m)
+        d = OrderedDict()
+        m3.fill_dict(d, [space.w_None, space.w_None, space.w_None])
+        assert list(d) == [w_a, w_b, w_c]
+
+
+    def test_decode_key_map(self):
+        m = Terminator(self.space)
+        m_diff = Terminator(self.space)
+        for s1 in ["abc", "1001" * 10, u"&#228;".encode("utf-8")]:
+            s = ' "%s"   "%s" "%s"' % (s1, s1, s1)
+            dec = JSONDecoder(self.space, s)
+            assert dec.pos == 0
+            m1 = dec.decode_key_map(dec.skip_whitespace(0), m)
+            assert m1.w_key._utf8 == s1
+            assert m1.key_repr == '"%s"' % s1
+
+            # check caching on w_key level
+            m2 = dec.decode_key_map(dec.skip_whitespace(dec.pos), m_diff)
+            assert m1.w_key is m2.w_key
+
+            # check caching on map level
+            m3 = dec.decode_key_map(dec.skip_whitespace(dec.pos), m_diff)
+            assert m3 is m2
+            dec.close()
+
+    def test_decode_string_caching(self):
+        for s1 in ["abc", u"&#228;".encode("utf-8")]:
+            s = '"%s"   "%s"    "%s"' % (s1, s1, s1)
+            dec = JSONDecoder(self.space, s)
+            dec.MIN_SIZE_FOR_STRING_CACHE = 0
+            assert dec.pos == 0
+            w_x = dec.decode_string(1)
+            w_y = dec.decode_string(dec.skip_whitespace(dec.pos) + 1)
+            assert w_x is not w_y
+            # check caching
+            w_z = dec.decode_string(dec.skip_whitespace(dec.pos) + 1)
+            assert w_z is w_y
+            dec.close()
+
+    def _make_some_maps(self):
+        # base -> m1 -> m2 -> m3
+        #                \-> m4
+        w_a = self.space.newutf8("a", 1)
+        w_b = self.space.newutf8("b", 1)
+        w_c = self.space.newutf8("c", 1)
+        w_d = self.space.newutf8("d", 1)
+        base = Terminator(self.space)
+        base.instantiation_count = 6
+        m1 = base.get_next(w_a, 'a"', 0, 2, base)
+        m2 = m1.get_next(w_b, 'b"', 0, 2, base)
+        m3 = m2.get_next(w_c, 'c"', 0, 2, base)
+        m4 = m2.get_next(w_d, 'd"', 0, 2, base)
+        return base, m1, m2, m3, m4
+
+    # unit tests for map state transistions
+    def test_fringe_to_useful(self):
+        base, m1, m2, m3, m4 = self._make_some_maps()
+        base.instantiation_count = 6
+        assert m1.state == MapBase.FRINGE
+        m1.instantiation_count = 6
+
+        assert m2.state == MapBase.PRELIMINARY
+        m2.instantiation_count = 6
+
+        assert m3.state == MapBase.PRELIMINARY
+        m3.instantiation_count = 2
+        assert m2.nextmap_first is m3
+
+        assert m4.state == MapBase.PRELIMINARY
+        m4.instantiation_count = 4
+
+        m1.mark_useful(base)
+        assert m1.state == MapBase.USEFUL
+        assert m2.state == MapBase.USEFUL
+        assert m3.state == MapBase.FRINGE
+        assert m4.state == MapBase.USEFUL
+        assert m2.nextmap_first is m4
+
+        assert m1.number_of_leaves == 2
+        base._check_invariants()
+
+    def test_number_of_leaves(self):
+        w_x = self.space.newutf8("x", 1)
+        base, m1, m2, m3, m4 = self._make_some_maps()
+        assert base.number_of_leaves == 2
+        assert m1.number_of_leaves == 2
+        assert m2.number_of_leaves == 2
+        assert m3.number_of_leaves == 1
+        assert m4.number_of_leaves == 1
+        m5 = m2.get_next(w_x, 'x"', 0, 2, base)
+        assert base.number_of_leaves == 3
+        assert m1.number_of_leaves == 3
+        assert m2.number_of_leaves == 3
+        assert m5.number_of_leaves == 1
+
+    def test_number_of_leaves_after_mark_blocked(self):
+        w_x = self.space.newutf8("x", 1)
+        base, m1, m2, m3, m4 = self._make_some_maps()
+        m5 = m2.get_next(w_x, 'x"', 0, 2, base)
+        assert base.number_of_leaves == 3
+        m2.mark_blocked(base)
+        assert base.number_of_leaves == 1
+
+    def test_mark_useful_cleans_fringe(self):
+        base, m1, m2, m3, m4 = self._make_some_maps()
+        base.instantiation_count = 6
+        assert m1.state == MapBase.FRINGE
+        m1.instantiation_count = 6
+        m2.instantiation_count = 6
+        m3.instantiation_count = 2
+        m4.instantiation_count = 4
+        assert base.current_fringe == {m1: None}
+
+        m1.mark_useful(base)
+        assert base.current_fringe == {m3: None}
+
+    def test_cleanup_fringe(self):
+        w_a = self.space.newutf8("a", 1)
+        w_b = self.space.newutf8("b", 1)
+        w_c = self.space.newutf8("c", 1)
+        w_d = self.space.newutf8("d", 1)
+        base = Terminator(self.space)
+        base.instantiation_count = 6
+        m1 = base.get_next(w_a, 'a"', 0, 2, base)
+        m2 = base.get_next(w_b, 'b"', 0, 2, base)
+        m3 = base.get_next(w_c, 'c"', 0, 2, base)
+        m4 = base.get_next(w_d, 'd"', 0, 2, base)
+        m5 = m4.get_next(w_a, 'a"', 0, 2, base)
+        base.instantiation_count = 7
+        m1.instantiation_count = 2
+        m2.instantiation_count = 2
+        m3.instantiation_count = 2
+        m4.instantiation_count = 1
+        m5.instantiation_count = 1
+        assert base.current_fringe == dict.fromkeys([m1, m2, m3, m4])
+
+        base.cleanup_fringe()
+        assert base.current_fringe == dict.fromkeys([m1, m2, m3])
+        assert m4.state == MapBase.BLOCKED
+        assert m4.nextmap_first is None
+        assert m4.nextmap_all is None
+        assert m5.state == MapBase.BLOCKED
+        assert m5.nextmap_first is None
+        assert m5.nextmap_all is None
+
+    def test_deal_with_blocked(self):
+        w_a = self.space.newutf8("a", 1)
+        w_b = self.space.newutf8("b", 1)
+        w_c = self.space.newutf8("c", 1)
+        space = self.space
+        s = '{"a": 1, "b": 2, "c": 3}'
+        dec = JSONDecoder(space, s)
+        dec.startmap = base = Terminator(space)
+        m1 = base.get_next(w_a, 'a"', 0, 2, base)
+        m2 = m1.get_next(w_b, 'b"', 0, 2, base)
+        m2.mark_blocked(base)
+        w_res = dec.decode_object(1)
+        assert space.int_w(space.len(w_res)) == 3
+        assert space.int_w(space.getitem(w_res, w_a)) == 1
+        assert space.int_w(space.getitem(w_res, w_b)) == 2
+        assert space.int_w(space.getitem(w_res, w_c)) == 3
+        dec.close()
+
+    def test_deal_with_blocked_number_of_leaves(self):
+        w_a = self.space.newutf8("a", 1)
+        w_b = self.space.newutf8("b", 1)
+        w_x = self.space.newutf8("x", 1)
+        w_u = self.space.newutf8("u", 1)
+        space = self.space
+        base = Terminator(space)
+        m1 = base.get_next(w_a, 'a"', 0, 2, base)
+        m2 = m1.get_next(w_b, 'b"', 0, 2, base)
+        m2.get_next(w_x, 'x"', 0, 2, base)
+        m2.get_next(w_u, 'u"', 0, 2, base)
+        assert base.number_of_leaves == 2
+        m2.mark_blocked(base)
+        assert base.number_of_leaves == 1
+
+    def test_instatiation_count(self):
+        m = Terminator(self.space)
+        dec = JSONDecoder(self.space, '"abc" "def"')
+        m1 = dec.decode_key_map(dec.skip_whitespace(0), m)
+        m2 = dec.decode_key_map(dec.skip_whitespace(6), m1)
+        m1 = dec.decode_key_map(dec.skip_whitespace(0), m)
+        m2 = dec.decode_key_map(dec.skip_whitespace(6), m1)
+        m1 = dec.decode_key_map(dec.skip_whitespace(0), m)
+
+        assert m1.instantiation_count == 3
+        assert m2.instantiation_count == 2
+        dec.close()
+
 
 class AppTest(object):
     spaceconfig = {"objspace.usemodules._pypyjson": True}
@@ -55,7 +290,7 @@
         raises(ValueError, _pypyjson.loads, 'fa')
         raises(ValueError, _pypyjson.loads, 'f')
         raises(ValueError, _pypyjson.loads, 'falXX')
-        
+
 
     def test_decode_string(self):
         import _pypyjson
@@ -85,7 +320,7 @@
         import _pypyjson
         assert _pypyjson.loads(r'"\\"') == u'\\'
         assert _pypyjson.loads(r'"\""') == u'"'
-        assert _pypyjson.loads(r'"\/"') == u'/'       
+        assert _pypyjson.loads(r'"\/"') == u'/'
         assert _pypyjson.loads(r'"\b"') == u'\b'
         assert _pypyjson.loads(r'"\f"') == u'\f'
         assert _pypyjson.loads(r'"\n"') == u'\n'
@@ -101,12 +336,19 @@
         import _pypyjson
         s = r'"hello\nworld' # missing the trailing "
         raises(ValueError, "_pypyjson.loads(s)")
-        
+
     def test_escape_sequence_unicode(self):
         import _pypyjson
         s = r'"\u1234"'
         assert _pypyjson.loads(s) == u'\u1234'
 
+    def test_escape_sequence_mixed_with_utf8(self):
+        import _pypyjson
+        utf8 = u'&#228;"'.encode("utf-8")
+        assert _pypyjson.loads(r'"abc\\' + utf8) == u'abc\\&#228;'
+        assert _pypyjson.loads(r'"abc\"' + utf8) == u'abc"&#228;'
+        assert _pypyjson.loads(r'"def\u1234' + utf8) == u'def\u1234&#228;'
+
     def test_invalid_utf_8(self):
         import _pypyjson
         s = '"\xe0"' # this is an invalid UTF8 sequence inside a string
@@ -176,13 +418,18 @@
         s = '{"hello": "world", "aaa": "bbb"}'
         assert _pypyjson.loads(s) == {'hello': 'world',
                                       'aaa': 'bbb'}
+        assert _pypyjson.loads(s) == {'hello': 'world',
+                                      'aaa': 'bbb'}
         raises(ValueError, _pypyjson.loads, '{"key"')
         raises(ValueError, _pypyjson.loads, '{"key": 42')
 
+        assert _pypyjson.loads('{"neighborhood": ""}') == {
+            "neighborhood": ""}
+
     def test_decode_object_nonstring_key(self):
         import _pypyjson
         raises(ValueError, "_pypyjson.loads('{42: 43}')")
-        
+
     def test_decode_array(self):
         import _pypyjson
         assert _pypyjson.loads('[]') == []
@@ -263,3 +510,4 @@
         for inputtext, errmsg in test_cases:
             exc = raises(ValueError, _pypyjson.loads, inputtext)
             assert str(exc.value) == errmsg
+
diff --git a/pypy/module/_pypyjson/test/test_simd.py 
b/pypy/module/_pypyjson/test/test_simd.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_pypyjson/test/test_simd.py
@@ -0,0 +1,110 @@
+import sys
+import pytest
+from rpython.rtyper.lltypesystem import lltype, rffi
+from rpython.rlib.rarithmetic import r_uint, intmask
+
+from pypy.module._pypyjson.simd import USE_SIMD
+from pypy.module._pypyjson.simd import find_end_of_string_slow
+from pypy.module._pypyjson.simd import find_end_of_string_slow_no_hash
+from pypy.module._pypyjson.simd import print_chars
+from pypy.module._pypyjson.simd import find_end_of_string_simd_unaligned, 
WORD_SIZE
+from pypy.module._pypyjson.simd import 
find_end_of_string_simd_unaligned_no_hash
+
+try:
+    from hypothesis import example, given, strategies
+except ImportError:
+    pytest.skip("missing hypothesis!")
+
+if not USE_SIMD:
+    pytest.skip("only implemented for 64 bit for now")
+
+def fill_to_word_size(res, ch=" "):
+    if len(res) % WORD_SIZE != 0:
+        res += ch * (WORD_SIZE - (len(res) % WORD_SIZE))
+    return res
+
+def string_to_word(s):
+    assert len(s) == WORD_SIZE
+    ll_chars, llobj, flag = rffi.get_nonmovingbuffer_ll_final_null(s)
+    try:
+        wordarray = rffi.cast(rffi.ULONGP, ll_chars)
+        return wordarray[0]
+    finally:
+        rffi.free_nonmovingbuffer_ll(ll_chars, llobj, flag)
+
+def ll(callable, string, *args):
+    ll_chars, llobj, flag = rffi.get_nonmovingbuffer_ll_final_null(string)
+    try:
+        return callable(ll_chars, *args)
+    finally:
+        rffi.free_nonmovingbuffer_ll(ll_chars, llobj, flag)
+
+word = strategies.builds(
+    r_uint, strategies.integers(min_value=-sys.maxint-1, max_value=sys.maxint))
+
+def build_string(prefix, content, end, suffix):
+    res = prefix + '"' + "".join([chr(x) for x in content]) + end + suffix
+    return fill_to_word_size(res), len(prefix) + 1
+
+string_in_context_strategy = strategies.builds(
+    build_string, prefix=strategies.binary(),
+    content=strategies.lists(strategies.integers(1, 255), min_size=1),
+    end=strategies.sampled_from('"\\\x00\x01'),
+    suffix=strategies.binary())
+
+def compare(string, res1, res2):
+    hash1, nonascii1, endindex1 = res1
+    hash2, nonascii2, endindex2 = res2
+    assert endindex1 == endindex2
+    if string[endindex1 - 1] == '"':
+        assert hash1 == hash2
+    assert nonascii1 == nonascii2
+
+
+@example(('"       \x80"      ', 1))
+@example(('"\x01"          ', 1))
+@example(('"aaaaaaaa"\x00\x00\x00\x00\x00\x00\x00       ', 1))
+@example(('"aaaaaaaa"      ', 1))
+@example(('"12"', 1))
+@example(('"1234567abcdefghAB"', 1))
+@example(('"1234567abcdefgh"', 1))
+@example((' "123456ABCDEF"        \x00', 2))
+@example((' "123456aaaaaaaaABCDEF"\x00', 2))
+@given(string_in_context_strategy)
+def test_find_end_of_string(a):
+    (string, startindex) = a
+    res = ll(find_end_of_string_slow, string, startindex, len(string))
+    hash, nonascii1, endposition1 = res
+    res2 = ll(find_end_of_string_slow_no_hash, string, startindex, len(string))
+    assert res2 == (nonascii1, endposition1)
+    ch = string[endposition1]
+    assert ch == '"' or ch == '\\' or ch < '\x20'
+    for ch in string[startindex:endposition1]:
+        assert not (ch == '"' or ch == '\\' or ch < '\x20')
+    compare(string, res, ll(find_end_of_string_simd_unaligned, string, 
startindex, len(string)))
+
+    nonascii2, endposition2 = ll(find_end_of_string_simd_unaligned_no_hash, 
string, startindex, len(string))
+    assert nonascii1 == nonascii2
+    assert endposition1 == endposition2
+
+@given(string_in_context_strategy, strategies.binary(min_size=1))
+def test_find_end_of_string_position_invariance(a, prefix):
+    fn = find_end_of_string_simd_unaligned
+    (string, startindex) = a
+    h1, nonascii1, i1 = ll(fn, string, startindex, len(string))
+    string2 = prefix + string
+    h2, nonascii2, i2 = ll(fn, string2, startindex + len(prefix), len(string) 
+ len(prefix))
+    assert h1 == h2
+    assert nonascii1 == nonascii2
+    assert i1 + len(prefix) == i2
+
+@given(string_in_context_strategy, strategies.binary(min_size=1))
+def test_find_end_of_string_position_invariance_no_hash(a, prefix):
+    fn = find_end_of_string_simd_unaligned_no_hash
+    (string, startindex) = a
+    nonascii1, i1 = ll(fn, string, startindex, len(string))
+    string2 = prefix + string
+    nonascii2, i2 = ll(fn, string2, startindex + len(prefix), len(string) + 
len(prefix))
+    assert nonascii1 == nonascii2
+    assert i1 + len(prefix) == i2
+
diff --git a/pypy/objspace/std/jsondict.py b/pypy/objspace/std/jsondict.py
new file mode 100644
--- /dev/null
+++ b/pypy/objspace/std/jsondict.py
@@ -0,0 +1,167 @@
+"""dict implementation specialized for object loaded by the _pypyjson module.
+
+Somewhat similar to MapDictStrategy, also uses a map.
+"""
+
+from rpython.rlib import jit, rerased, objectmodel, debug
+
+from pypy.objspace.std.dictmultiobject import (
+    UnicodeDictStrategy, DictStrategy,
+    create_iterator_classes, W_DictObject)
+
+
+def from_values_and_jsonmap(space, values_w, jsonmap):
+    if not objectmodel.we_are_translated():
+        assert len(values_w) == len(jsonmap.get_keys_in_order())
+        assert len(values_w) != 0
+    debug.make_sure_not_resized(values_w)
+    strategy = jsonmap.strategy_instance
+    if strategy is None:
+        jsonmap.strategy_instance = strategy = JsonDictStrategy(space, jsonmap)
+    storage = strategy.erase(values_w)
+    return W_DictObject(space, strategy, storage)
+
+def devolve_jsonmap_dict(w_dict):
+    assert isinstance(w_dict, W_DictObject)
+    strategy = w_dict.get_strategy()
+    assert isinstance(strategy, JsonDictStrategy)
+    strategy.switch_to_unicode_strategy(w_dict)
+
+def get_jsonmap_from_dict(w_dict):
+    assert isinstance(w_dict, W_DictObject)
+    strategy = w_dict.get_strategy()
+    assert isinstance(strategy, JsonDictStrategy)
+    return strategy.jsonmap
+
+class JsonDictStrategy(DictStrategy):
+    erase, unerase = rerased.new_erasing_pair("jsondict")
+    erase = staticmethod(erase)
+    unerase = staticmethod(unerase)
+
+    _immutable_fields_ = ['jsonmap']
+
+    def __init__(self, space, jsonmap):
+        DictStrategy.__init__(self, space)
+        self.jsonmap = jsonmap
+
+    def wrap(self, w_key):
+        return w_key
+
+    def wrapkey(space, key):
+        return key
+
+    def get_empty_storage(self):
+        raise NotImplementedError("should not be reachable")
+
+    def is_correct_type(self, w_obj):
+        space = self.space
+        return space.is_w(space.type(w_obj), space.w_unicode)
+
+    def _never_equal_to(self, w_lookup_type):
+        return False
+
+    def length(self, w_dict):
+        return len(self.unerase(w_dict.dstorage))
+
+    def getitem(self, w_dict, w_key):
+        if self.is_correct_type(w_key):
+            return self.getitem_unicode(w_dict, w_key)
+        else:
+            self.switch_to_unicode_strategy(w_dict)
+            return w_dict.getitem(w_key)
+
+    def getitem_unicode(self, w_dict, w_key):
+        storage_w = self.unerase(w_dict.dstorage)
+        if jit.isconstant(w_key):
+            jit.promote(self)
+        index = self.jsonmap.get_index(w_key)
+        if index == -1:
+            return None
+        return storage_w[index]
+
+    def setitem(self, w_dict, w_key, w_value):
+        if self.is_correct_type(w_key):
+            storage_w = self.unerase(w_dict.dstorage)
+            index = self.jsonmap.get_index(w_key)
+            if index != -1:
+                storage_w[index] = w_value
+                return
+        self.switch_to_unicode_strategy(w_dict)
+        w_dict.setitem(w_key, w_value)
+
+    def setdefault(self, w_dict, w_key, w_default):
+        if self.is_correct_type(w_key):
+            w_result = self.getitem_unicode(w_dict, w_key)
+            if w_result is not None:
+                return w_result
+        self.switch_to_unicode_strategy(w_dict)
+        return w_dict.setdefault(w_key, w_default)
+
+    def delitem(self, w_dict, w_key):
+        self.switch_to_unicode_strategy(w_dict)
+        return w_dict.delitem(w_key)
+
+    def popitem(self, w_dict):
+        self.switch_to_unicode_strategy(w_dict)
+        return w_dict.popitem()
+
+    def switch_to_unicode_strategy(self, w_dict):
+        strategy = self.space.fromcache(UnicodeDictStrategy)
+        values_w = self.unerase(w_dict.dstorage)
+        storage = strategy.get_empty_storage()
+        d_new = strategy.unerase(storage)
+        keys_in_order = self.jsonmap.get_keys_in_order()
+        assert len(keys_in_order) == len(values_w)
+        for index, w_key in enumerate(keys_in_order):
+            assert w_key is not None
+            assert type(w_key) is self.space.UnicodeObjectCls
+            d_new[w_key] = values_w[index]
+        w_dict.set_strategy(strategy)
+        w_dict.dstorage = storage
+
+    def w_keys(self, w_dict):
+        return self.space.newlist(self.jsonmap.get_keys_in_order())
+
+    def values(self, w_dict):
+        return self.unerase(w_dict.dstorage)[:]  # to make resizable
+
+    def items(self, w_dict):
+        space = self.space
+        storage_w = self.unerase(w_dict.dstorage)
+        res = [None] * len(storage_w)
+        for index, w_key in enumerate(self.jsonmap.get_keys_in_order()):
+            res[index] = space.newtuple([w_key, storage_w[index]])
+        return res
+
+    def getiterkeys(self, w_dict):
+        return iter(self.jsonmap.get_keys_in_order())
+
+    def getitervalues(self, w_dict):
+        storage_w = self.unerase(w_dict.dstorage)
+        return iter(storage_w)
+
+    def getiteritems_with_hash(self, w_dict):
+        storage_w = self.unerase(w_dict.dstorage)
+        return ZipItemsWithHash(self.jsonmap.get_keys_in_order(), storage_w)
+
+
+class ZipItemsWithHash(object):
+    def __init__(self, list1, list2):
+        assert len(list1) == len(list2)
+        self.list1 = list1
+        self.list2 = list2
+        self.i = 0
+
+    def __iter__(self):
+        return self
+
+    def next(self):
+        i = self.i
+        if i >= len(self.list1):
+            raise StopIteration
+        self.i = i + 1
+        w_key = self.list1[i]
+        return (w_key, self.list2[i], w_key.hash_w())
+
+
+create_iterator_classes(JsonDictStrategy)
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy default: merge json-decoder-maps:

Reply via email to