Author: Carl Friedrich Bolz-Tereick <[email protected]>
Branch: 
Changeset: r96755:f4e45930f401
Date: 2019-06-05 14:50 +0200
http://bitbucket.org/pypy/pypy/changeset/f4e45930f401/

Log:    Backed out changeset cefcc54d57f7

diff --git a/pypy/module/_pypyjson/interp_decoder.py 
b/pypy/module/_pypyjson/interp_decoder.py
--- a/pypy/module/_pypyjson/interp_decoder.py
+++ b/pypy/module/_pypyjson/interp_decoder.py
@@ -1,13 +1,11 @@
 import sys
 from rpython.rlib.rstring import StringBuilder
-from rpython.rlib.objectmodel import specialize, always_inline
-from rpython.rlib import rfloat, runicode, jit, objectmodel, rutf8
+from rpython.rlib.objectmodel import specialize, always_inline, r_dict
+from rpython.rlib import rfloat, rutf8
 from rpython.rtyper.lltypesystem import lltype, rffi
 from rpython.rlib.rarithmetic import r_uint
 from pypy.interpreter.error import oefmt
 from pypy.interpreter import unicodehelper
-from pypy.interpreter.baseobjspace import W_Root
-from pypy.module._pypyjson import simd
 
 OVF_DIGITS = len(str(sys.maxint))
 
@@ -17,85 +15,45 @@
 # precomputing negative powers of 10 is MUCH faster than using e.g. math.pow
 # at runtime
 NEG_POW_10 = [10.0**-i for i in range(16)]
-del i
-
 def neg_pow_10(x, exp):
     if exp >= len(NEG_POW_10):
         return 0.0
     return x * NEG_POW_10[exp]
 
-def _compare_cache_entry(space, res, ll_chars, start, length):
-    if length != len(res):
+def slice_eq(a, b):
+    (ll_chars1, start1, length1, _) = a
+    (ll_chars2, start2, length2, _) = b
+    if length1 != length2:
         return False
-    index = start
-    for c in res:
-        x = ord(c)
-        if not ll_chars[index] == chr(x):
+    j = start2
+    for i in range(start1, start1 + length1):
+        if ll_chars1[i] != ll_chars2[j]:
             return False
-        index += 1
+        j += 1
     return True
 
+def slice_hash(a):
+    (ll_chars, start, length, h) = a
+    return h
 
-class IntCache(object):
-    START = -10
-    END = 256
-
-    def __init__(self, space):
-        self.space = space
-        self.cache = [self.space.newint(i)
-                for i in range(self.START, self.END)]
-
-    def newint(self, intval):
-        if self.START <= intval < self.END:
-            return self.cache[intval - self.START]
-        return self.space.newint(intval)
-
-
-class JSONDecoder(W_Root):
-
-    LRU_SIZE = 16
-    LRU_MASK = LRU_SIZE - 1
-
-    DEFAULT_SIZE_SCRATCH = 20
-
-    MIN_SIZE_FOR_STRING_CACHE = 1024 * 1024
-
-
+TYPE_UNKNOWN = 0
+TYPE_STRING = 1
+class JSONDecoder(object):
     def __init__(self, space, s):
         self.space = space
-        self.w_empty_string = space.newutf8("", 0)
-
         self.s = s
-
         # we put our string in a raw buffer so:
         # 1) we automatically get the '\0' sentinel at the end of the string,
         #    which means that we never have to check for the "end of string"
         # 2) we can pass the buffer directly to strtod
-        self.ll_chars, self.flag = rffi.get_nonmovingbuffer_final_null(self.s)
+        self.ll_chars = rffi.str2charp(s)
         self.end_ptr = lltype.malloc(rffi.CCHARPP.TO, 1, flavor='raw')
         self.pos = 0
-        self.intcache = space.fromcache(IntCache)
-
-        self.cache = {}
-        self.cache_wrapped = {}
-
-        self.lru_cache = [0] * self.LRU_SIZE
-        self.lru_index = 0
-
-        self.startmap = self.space.fromcache(Terminator)
-        self.unclear_objects = []
-
-        self.scratch = [[None] * self.DEFAULT_SIZE_SCRATCH]  # list of scratch 
space
-
+        self.cache = r_dict(slice_eq, slice_hash, simple_hash_eq=True)
 
     def close(self):
-        rffi.free_nonmovingbuffer(self.s, self.ll_chars, self.flag)
+        rffi.free_charp(self.ll_chars)
         lltype.free(self.end_ptr, flavor='raw')
-        # clean up objects that are instances of now blocked maps
-        for w_obj in self.unclear_objects:
-            jsonmap = self._get_jsonmap_from_dict(w_obj)
-            if jsonmap.is_blocked():
-                self._devolve_jsonmap_dict(w_obj)
 
     def getslice(self, start, end):
         assert start >= 0
@@ -103,15 +61,18 @@
         return self.s[start:end]
 
     def skip_whitespace(self, i):
-        ll_chars = self.ll_chars
         while True:
-            ch = ll_chars[i]
+            ch = self.ll_chars[i]
             if is_whitespace(ch):
-                i += 1
+                i+=1
             else:
                 break
         return i
 
+    @specialize.arg(1)
+    def _raise(self, msg, *args):
+        raise oefmt(self.space.w_ValueError, msg, *args)
+
     def decode_any(self, i):
         i = self.skip_whitespace(i)
         ch = self.ll_chars[i]
@@ -141,11 +102,6 @@
             self._raise("No JSON object could be decoded: unexpected '%s' at 
char %d",
                         ch, i)
 
-
-    @specialize.arg(1)
-    def _raise(self, msg, *args):
-        raise oefmt(self.space.w_ValueError, msg, *args)
-
     def decode_null(self, i):
         if (self.ll_chars[i]   == 'u' and
             self.ll_chars[i+1] == 'l' and
@@ -206,7 +162,7 @@
             return self.decode_int_slow(start)
 
         self.pos = i
-        return self.intcache.newint(intval)
+        return self.space.newint(intval)
 
     def decode_float(self, i):
         from rpython.rlib import rdtoa
@@ -258,26 +214,6 @@
         ovf_maybe = (count >= OVF_DIGITS)
         return i, ovf_maybe, sign * intval
 
-    def _raise_control_char_in_string(self, ch, startindex, currindex):
-        if ch == '\0':
-            self._raise("Unterminated string starting at char %d",
-                        startindex - 1)
-        else:
-            self._raise("Invalid control character at char %d", currindex-1)
-
-    def _raise_object_error(self, ch, start, i):
-        if ch == '\0':
-            self._raise("Unterminated object starting at char %d", start)
-        else:
-            self._raise("Unexpected '%s' when decoding object (char %d)",
-                        ch, i)
-
-    def decode_surrogate_pair(self, i, highsurr):
-        """ uppon enter the following must hold:
-              chars[i] == "\\" and chars[i+1] == "u"
-        """
-        # the possible ValueError is caught by the caller
-
     def decode_array(self, i):
         w_list = self.space.newlist([])
         start = i
@@ -304,13 +240,6 @@
                 self._raise("Unexpected '%s' when decoding array (char %d)",
                             ch, i-1)
 
-    def decode_any_context(self, i, context):
-        i = self.skip_whitespace(i)
-        ch = self.ll_chars[i]
-        if ch == '"':
-            return self.decode_string(i+1, context)
-        return self.decode_any(i)
-
     def decode_object(self, i):
         start = i
 
@@ -319,124 +248,62 @@
             self.pos = i+1
             return self.space.newdict()
 
-        if self.scratch:
-            values_w = self.scratch.pop()
-        else:
-            values_w = [None] * self.DEFAULT_SIZE_SCRATCH
-        nextindex = 0
-        currmap = self.startmap
+        d = self._create_empty_dict()
         while True:
             # parse a key: value
-            currmap = self.decode_key(i, currmap)
+            w_name = self.decode_key(i)
             i = self.skip_whitespace(self.pos)
             ch = self.ll_chars[i]
             if ch != ':':
                 self._raise("No ':' found at char %d", i)
             i += 1
-
-            w_value = self.decode_any_context(i, currmap)
-
-            if nextindex == len(values_w):  # full
-                values_w = values_w + [None] * len(values_w)  # double
-            values_w[nextindex] = w_value
-            nextindex += 1
+            i = self.skip_whitespace(i)
+            #
+            w_value = self.decode_any(i)
+            d[w_name] = w_value
             i = self.skip_whitespace(self.pos)
             ch = self.ll_chars[i]
             i += 1
             if ch == '}':
                 self.pos = i
-                if currmap.is_blocked():
-                    currmap.instantiation_count += 1
-                    self.scratch.append(values_w)  # can reuse next time
-                    dict_w = self._switch_to_dict(currmap, values_w, nextindex)
-                    return self._create_dict(dict_w)
-                self.scratch.append(values_w)  # can reuse next time
-                values_w = values_w[:nextindex]
-                currmap.instantiation_count += 1
-                w_res = self._create_dict_map(values_w, currmap)
-                if currmap.state != MapBase.USEFUL:
-                    self.unclear_objects.append(w_res)
-                return w_res
+                return self._create_dict(d)
             elif ch == ',':
-                i = self.skip_whitespace(i)
-                if currmap.is_blocked():
-                    currmap.instantiation_count += 1
-                    self.scratch.append(values_w)  # can reuse next time
-                    dict_w = self._switch_to_dict(currmap, values_w, nextindex)
-                    return self.decode_object_dict(i, start, dict_w)
+                pass
+            elif ch == '\0':
+                self._raise("Unterminated object starting at char %d", start)
             else:
-                self._raise_object_error(ch, start, i - 1)
+                self._raise("Unexpected '%s' when decoding object (char %d)",
+                            ch, i-1)
 
-    def _create_dict_map(self, values_w, jsonmap):
-        from pypy.objspace.std.jsondict import from_values_and_jsonmap
-        return from_values_and_jsonmap(self.space, values_w, jsonmap)
-
-    def _devolve_jsonmap_dict(self, w_dict):
-        from pypy.objspace.std.jsondict import devolve_jsonmap_dict
-        devolve_jsonmap_dict(w_dict)
-
-    def _get_jsonmap_from_dict(self, w_dict):
-        from pypy.objspace.std.jsondict import get_jsonmap_from_dict
-        return get_jsonmap_from_dict(w_dict)
-
-    def _switch_to_dict(self, currmap, values_w, nextindex):
-        dict_w = self._create_empty_dict()
-        index = nextindex - 1
-        while isinstance(currmap, JSONMap):
-            dict_w[currmap.w_key] = values_w[index]
-            index -= 1
-            currmap = currmap.prev
-        assert len(dict_w) == nextindex
-        return dict_w
-
-    def decode_object_dict(self, i, start, dict_w):
+    def decode_string(self, i):
+        start = i
+        bits = 0
         while True:
-            # parse a key: value
-            w_key = self.decode_key_string(i)
-            i = self.skip_whitespace(self.pos)
-            ch = self.ll_chars[i]
-            if ch != ':':
-                self._raise("No ':' found at char %d", i)
-            i += 1
-
-            w_value = self.decode_any(i)
-            dict_w[w_key] = w_value
-            i = self.skip_whitespace(self.pos)
+            # this loop is a fast path for strings which do not contain escape
+            # characters
             ch = self.ll_chars[i]
             i += 1
-            if ch == '}':
+            bits |= ord(ch)
+            if ch == '"':
                 self.pos = i
-                return self._create_dict(dict_w)
-            elif ch == ',':
-                i = self.skip_whitespace(i)
-            else:
-                self._raise_object_error(ch, start, i - 1)
+                return self._create_string(start, i - 1, bits)
+            elif ch == '\\' or ch < '\x20':
+                self.pos = i-1
+                return self.decode_string_escaped(start)
 
-    def decode_string_uncached(self, i):
-        start = i
-        ll_chars = self.ll_chars
-        nonascii, i = simd.find_end_of_string_no_hash(ll_chars, i, len(self.s))
-        ch = ll_chars[i]
-        if ch == '\\':
-            self.pos = i
-            return self.decode_string_escaped(start, nonascii)
-        if ch < '\x20':
-            self._raise_control_char_in_string(ch, start, i)
+    def _create_string(self, start, end, bits):
+        if bits & 0x80:
+            # the 8th bit is set, it's an utf8 string
+            content_utf8 = self.getslice(start, end)
+            lgt = unicodehelper.check_utf8_or_raise(self.space,
+                                                          content_utf8)
+            return self.space.newutf8(content_utf8, lgt)
         else:
-            assert ch == '"'
-
-        self.pos = i + 1
-        return self._create_string_wrapped(start, i, nonascii)
-
-    def _create_string_wrapped(self, start, end, nonascii):
-        content = self.getslice(start, end)
-        if nonascii:
-            # contains non-ascii chars, we need to check that it's valid utf-8
-            lgt = unicodehelper.check_utf8_or_raise(self.space,
-                                                          content)
-        else:
-            lgt = end - start
-        return self.space.newutf8(content, lgt)
+            # ascii only, fast path (ascii is a strict subset of
+            # latin1, and we already checked that all the chars are <
+            # 128)
+            return self.space.newutf8(self.getslice(start, end),
+                                      end - start)
 
     def _create_dict(self, d):
         from pypy.objspace.std.dictmultiobject import from_unicode_key_dict
@@ -446,7 +313,8 @@
         from pypy.objspace.std.dictmultiobject import 
create_empty_unicode_key_dict
         return create_empty_unicode_key_dict(self.space)
 
-    def decode_string_escaped(self, start, nonascii):
+
+    def decode_string_escaped(self, start):
         i = self.pos
         builder = StringBuilder((i - start) * 2) # just an estimate
         assert start >= 0
@@ -457,21 +325,25 @@
             i += 1
             if ch == '"':
                 content_utf8 = builder.build()
-                length = unicodehelper.check_utf8_or_raise(self.space,
+                lgt = unicodehelper.check_utf8_or_raise(self.space,
                                                            content_utf8)
                 self.pos = i
-                return self.space.newutf8(content_utf8, length)
+                return self.space.newutf8(content_utf8, lgt)
             elif ch == '\\':
-                i = self.decode_escape_sequence_to_utf8(i, builder)
+                i = self.decode_escape_sequence(i, builder)
             elif ch < '\x20':
-                self._raise_control_char_in_string(ch, start, i)
+                if ch == '\0':
+                    self._raise("Unterminated string starting at char %d",
+                                start - 1)
+                else:
+                    self._raise("Invalid control character at char %d", i-1)
             else:
                 builder.append(ch)
 
-    def decode_escape_sequence_to_utf8(self, i, stringbuilder):
+    def decode_escape_sequence(self, i, builder):
         ch = self.ll_chars[i]
         i += 1
-        put = stringbuilder.append
+        put = builder.append
         if ch == '\\':  put('\\')
         elif ch == '"': put('"' )
         elif ch == '/': put('/' )
@@ -481,37 +353,22 @@
         elif ch == 'r': put('\r')
         elif ch == 't': put('\t')
         elif ch == 'u':
-            # may be a suggorate pair
-            return self.decode_escape_sequence_unicode(i, stringbuilder)
+            return self.decode_escape_sequence_unicode(i, builder)
         else:
             self._raise("Invalid \\escape: %s (char %d)", ch, i-1)
         return i
 
-    def _get_int_val_from_hex4(self, i):
-        ll_chars = self.ll_chars
-        res = 0
-        for i in range(i, i + 4):
-            ch = ord(ll_chars[i])
-            if ord('a') <= ch <= ord('f'):
-                digit = ch - ord('a') + 10
-            elif ord('A') <= ch <= ord('F'):
-                digit = ch - ord('A') + 10
-            elif ord('0') <= ch <= ord('9'):
-                digit = ch - ord('0')
-            else:
-                raise ValueError
-            res = (res << 4) + digit
-        return res
-
     def decode_escape_sequence_unicode(self, i, builder):
         # at this point we are just after the 'u' of the \u1234 sequence.
         start = i
         i += 4
+        hexdigits = self.getslice(start, i)
         try:
-            val = self._get_int_val_from_hex4(start)
+            val = int(hexdigits, 16)
             if (0xd800 <= val <= 0xdbff and
                     self.ll_chars[i] == '\\' and self.ll_chars[i+1] == 'u'):
-                lowsurr = self._get_int_val_from_hex4(i + 2)
+                hexdigits = self.getslice(i+2, i+6)
+                lowsurr = int(hexdigits, 16)
                 if 0xdc00 <= lowsurr <= 0xdfff:
                     # decode surrogate pair
                     val = 0x10000 + (((val - 0xd800) << 10) |
@@ -526,531 +383,45 @@
         builder.append(utf8_ch)
         return i
 
+    def decode_key(self, i):
+        """ returns a wrapped unicode """
+        from rpython.rlib.rarithmetic import intmask
 
-    def decode_string(self, i, context=None):
-        ll_chars = self.ll_chars
-        start = i
-        ch = ll_chars[i]
-        if ch == '"':
-            self.pos = i + 1
-            return self.w_empty_string # surprisingly common
-
-        cache = True
-        if context is not None:
-            context.decoded_strings += 1
-            if not context.should_cache():
-                cache = False
-        if len(self.s) < self.MIN_SIZE_FOR_STRING_CACHE:
-            cache = False
-
-        if not cache:
-            return self.decode_string_uncached(i)
-
-        strhash, nonascii, i = simd.find_end_of_string(ll_chars, i, 
len(self.s))
-        ch = ll_chars[i]
-        if ch == '\\':
-            self.pos = i
-            return self.decode_string_escaped(start, nonascii)
-        if ch < '\x20':
-            self._raise_control_char_in_string(ch, start, i)
-        else:
-            assert ch == '"'
-
-        self.pos = i + 1
-
-        length = i - start
-        strhash ^= length
-
-        # check cache first:
-        try:
-            entry = self.cache_wrapped[strhash]
-        except KeyError:
-            w_res = self._create_string_wrapped(start, i, nonascii)
-            # only add *some* strings to the cache, because keeping them all is
-            # way too expensive
-            if (context is not None and context.decoded_strings < 200) or 
strhash in self.lru_cache:
-                entry = WrappedCacheEntry(
-                        self.getslice(start, start + length), w_res)
-                self.cache_wrapped[strhash] = entry
-            else:
-                self.lru_cache[self.lru_index] = strhash
-                self.lru_index = (self.lru_index + 1) & self.LRU_MASK
-            return w_res
-        if not _compare_cache_entry(self.space, entry.repr, ll_chars, start, 
length):
-            # hopefully rare
-            return self._create_string_wrapped(start, i, nonascii)
-        if context is not None:
-            context.cache_hits += 1
-        return entry.w_uni
-
-    def decode_key(self, i, currmap):
-        newmap = self._decode_key(i, currmap)
-        currmap.observe_transition(newmap)
-        return newmap
-
-    def _decode_key(self, i, currmap):
-        ll_chars = self.ll_chars
-        nextmap = currmap.fast_path_key_parse(self, i)
-        if nextmap is not None:
-            return nextmap
-
-        start = i
-        ch = ll_chars[i]
-        if ch != '"':
-            self._raise("Key name must be string at char %d", i)
-        i += 1
-        w_key = self._decode_key_string(i)
-        return currmap.get_next(w_key, self.s, start, self.pos)
-
-    def _decode_key_string(self, i):
-        ll_chars = self.ll_chars
-        start = i
-
-        strhash, nonascii, i = simd.find_end_of_string(ll_chars, i, 
len(self.s))
-
-        ch = ll_chars[i]
-        if ch == '\\':
-            self.pos = i
-            w_key = self.decode_string_escaped(start, nonascii)
-            return w_key
-        if ch < '\x20':
-            self._raise_control_char_in_string(ch, start, i)
-        length = i - start
-        strhash ^= length
-        self.pos = i + 1
-        # check cache first:
-        try:
-            entry = self.cache[strhash]
-        except KeyError:
-            w_res = self._create_string_wrapped(start, i, nonascii)
-            entry = WrappedCacheEntry(
-                    self.getslice(start, start + length), w_res)
-            self.cache[strhash] = entry
-            return w_res
-        if not _compare_cache_entry(self.space, entry.repr, ll_chars, start, 
length):
-            # hopefully rare
-            w_res = self._create_string_wrapped(start, i, nonascii)
-            print w_res
-        else:
-            w_res = entry.w_uni
-        return w_res
-
-    def decode_key_string(self, i):
+        i = self.skip_whitespace(i)
         ll_chars = self.ll_chars
         ch = ll_chars[i]
         if ch != '"':
             self._raise("Key name must be string at char %d", i)
         i += 1
-        return self._decode_key_string(i)
 
-class WrappedCacheEntry(object):
-    def __init__(self, repr, w_uni):
-        self.repr = repr
-        self.w_uni = w_uni
-
-
-class MapBase(object):
-    # the basic problem we are trying to solve is the following: dicts in
-    # json can either be used as objects, or as dictionaries with arbitrary
-    # string keys. We want to use maps for the former, but not for the
-    # latter. But we don't know in advance which kind of dict is which.
-
-    # Therefore we create "preliminary" maps where we aren't quite sure yet
-    # whether they are really useful maps or not. If we see them used often
-    # enough, we promote them to "useful" maps, which we will actually
-    # instantiate objects with.
-
-    # If we determine that a map is not used often enough, we can turn it
-    # into a "blocked" map, which is a point in the map tree where we will
-    # switch to regular dicts, when we reach that part of the tree.
-
-    # allowed graph edges or nodes in all_next:
-    #    USEFUL -------
-    #   /      \       \
-    #  v        v       v
-    # FRINGE   USEFUL   BLOCKED
-    #  |
-    #  v
-    # PRELIMINARY
-    #  |
-    #  v
-    # PRELIMINARY
-
-    # state transitions:
-    #   PRELIMINARY
-    #   /   |       \
-    #   |   v        v
-    #   | FRINGE -> USEFUL
-    #   |   |
-    #   \   |
-    #    v  v
-    #   BLOCKED
-
-    # the single_nextmap edge can only be these graph edges:
-    #  USEFUL
-    #   |
-    #   v
-    #  USEFUL
-    #
-    #  FRINGE
-    #   |
-    #   v
-    #  PRELIMINARY
-    #   |
-    #   v
-    #  PRELIMINARY
-
-    USEFUL = 'u'
-    PRELIMINARY = 'p'
-    FRINGE = 'f' # buffer between PRELIMINARY and USEFUL
-    BLOCKED = 'b'
-
-    # tunable parameters
-    MAX_FRINGE = 40
-    USEFUL_THRESHOLD = 5
-
-    def __init__(self, space):
-        self.space = space
-
-        # a single transition is stored in .single_nextmap
-        self.single_nextmap = None
-
-        # all_next is only initialized after seeing the *second* transition
-        # but then it also contains .single_nextmap
-        self.all_next = None # later dict {key: nextmap}
-
-        self.instantiation_count = 0
-        self.number_of_leaves = 1
-
-    def get_terminator(self):
-        while isinstance(self, JSONMap):
-            self = self.prev
-        assert isinstance(self, Terminator)
-        return self
-
-    def _check_invariants(self):
-        if self.all_next:
-            for next in self.all_next.itervalues():
-                next._check_invariants()
-        elif self.single_nextmap:
-            self.single_nextmap._check_invariants()
-
-    def get_next(self, w_key, string, start, stop):
-        from pypy.objspace.std.dictmultiobject import unicode_hash, unicode_eq
-        if isinstance(self, JSONMap):
-            assert not self.state == MapBase.BLOCKED
-        single_nextmap = self.single_nextmap
-        if (single_nextmap is not None and
-                single_nextmap.w_key.eq_w(w_key)):
-            return single_nextmap
-
-        assert stop >= 0
-        assert start >= 0
-
-        if single_nextmap is None:
-            # first transition ever seen, don't initialize all_next
-            next = self._make_next_map(w_key, string[start:stop])
-            self.single_nextmap = next
+        start = i
+        bits = 0
+        strhash = ord(ll_chars[i]) << 7
+        while True:
+            ch = ll_chars[i]
+            i += 1
+            if ch == '"':
+                break
+            elif ch == '\\' or ch < '\x20':
+                self.pos = i-1
+                return self.decode_string_escaped(start)
+            strhash = intmask((1000003 * strhash) ^ ord(ll_chars[i]))
+            bits |= ord(ch)
+        length = i - start - 1
+        if length == 0:
+            strhash = -1
         else:
-            if self.all_next is None:
-                self.all_next = objectmodel.r_dict(unicode_eq, unicode_hash,
-                  force_non_null=True, simple_hash_eq=True)
-                self.all_next[single_nextmap.w_key] = single_nextmap
-            else:
-                next = self.all_next.get(w_key, None)
-                if next is not None:
-                    return next
-            next = self._make_next_map(w_key, string[start:stop])
-            self.all_next[w_key] = next
-
-            # fix number_of_leaves
-            self.change_number_of_leaves(1)
-
-        terminator = self.get_terminator()
-        terminator.register_potential_fringe(next)
-        return next
-
-    def change_number_of_leaves(self, difference):
-        parent = self
-        while isinstance(parent, JSONMap):
-            parent.number_of_leaves += difference
-            parent = parent.prev
-        parent.number_of_leaves += difference # terminator
-
-    def fast_path_key_parse(self, decoder, position):
-        single_nextmap = self.single_nextmap
-        if single_nextmap:
-            ll_chars = decoder.ll_chars
-            assert isinstance(single_nextmap, JSONMap)
-            if single_nextmap.key_repr_cmp(ll_chars, position):
-                decoder.pos = position + len(single_nextmap.key_repr)
-                return single_nextmap
-
-    def observe_transition(self, newmap):
-        """ observe a transition from self to newmap.
-        This does a few things, including updating the self size estimate with
-        the knowledge that one object transitioned from self to newmap.
-        also it potentially decides that self should move to state USEFUL."""
-        self.instantiation_count += 1
-        if isinstance(self, JSONMap) and self.state == MapBase.FRINGE:
-            if self.is_useful():
-                self.mark_useful()
-
-    def _make_next_map(self, w_key, key_repr):
-        return JSONMap(self.space, self, w_key, key_repr)
-
-    def _all_dot(self, output):
-        identity = objectmodel.compute_unique_id(self)
-        output.append('%s [shape=box%s];' % (identity, self._get_dot_text()))
-        if self.all_next:
-            for w_key, value in self.all_next.items():
-                assert isinstance(value, JSONMap)
-                if value is self.single_nextmap:
-                    color = ", color=blue"
-                else:
-                    color = ""
-                output.append('%s -> %s [label="%s"%s];' % (
-                    identity, objectmodel.compute_unique_id(value), 
value.w_key._utf8, color))
-                value._all_dot(output)
-        elif self.single_nextmap is not None:
-            value = self.single_nextmap
-            output.append('%s -> %s [label="%s", color=blue];' % (
-                identity, objectmodel.compute_unique_id(value), 
value.w_key._utf8))
-            value._all_dot(output)
-
-
-    def _get_dot_text(self):
-        return ", label=base"
-
-    def view(self):
-        from dotviewer import graphclient
-        import pytest
-        r = ["digraph G {"]
-        self._all_dot(r)
-        r.append("}")
-        p = pytest.ensuretemp("resilientast").join("temp.dot")
-        p.write("\n".join(r))
-        graphclient.display_dot_file(str(p))
-
-    def _get_caching_stats(self):
-        caching = 0
-        num_maps = 1
-        if isinstance(self, JSONMap) and self.should_cache() and 
self.decoded_strings > 200:
-            caching += 1
-
-        if self.all_next:
-            children = self.all_next.values()
-        elif self.single_nextmap:
-            children = [self.single_nextmap]
-        else:
-            children = []
-        for child in children:
-            a, b = child._get_caching_stats()
-            caching += a
-            num_maps += b
-        return caching, num_maps
-
-class Terminator(MapBase):
-    def __init__(self, space):
-        MapBase.__init__(self, space)
-        self.all_object_count = 0
-        self.current_fringe = {}
-
-    def register_potential_fringe(self, prelim):
-        prev = prelim.prev
-        if (isinstance(prev, Terminator) or
-                isinstance(prev, JSONMap) and prev.state == MapBase.USEFUL):
-            prelim.state = MapBase.FRINGE
-
-            if len(self.current_fringe) > MapBase.MAX_FRINGE:
-                self.cleanup_fringe()
-            self.current_fringe[prelim] = None
-
-    def cleanup_fringe(self):
-        min_fringe = None
-        min_avg = 10000000000
-        for f in self.current_fringe:
-            if f.state == MapBase.FRINGE:
-                avg = f.average_instantiation()
-                if avg < min_avg:
-                    min_avg = avg
-                    min_fringe = f
-            else:
-                for f in self.current_fringe.keys():
-                    if f.state != MapBase.FRINGE:
-                        del self.current_fringe[f]
-                return
-        assert min_fringe
-        min_fringe.mark_blocked()
-        del self.current_fringe[min_fringe]
-
-
-class JSONMap(MapBase):
-    """ A map implementation to speed up parsing """
-
-    def __init__(self, space, prev, w_key, key_repr):
-        MapBase.__init__(self, space)
-
-        self.prev = prev
-        self.w_key = w_key
-        self.key_repr = key_repr
-
-        self.state = MapBase.PRELIMINARY
-
-        # key decoding stats
-        self.decoded_strings = 0
-        self.cache_hits = 0
-
-        # for jsondict support
-        self.key_to_index = None
-        self.keys_in_order = None
-        self.strategy_instance = None
-
-    @jit.elidable
-    def get_terminator(self):
-        while isinstance(self, JSONMap):
-            self = self.prev
-        assert isinstance(self, Terminator)
-        return self
-
-    def _check_invariants(self):
-        assert self.state in (
-            MapBase.USEFUL,
-            MapBase.PRELIMINARY,
-            MapBase.FRINGE,
-            MapBase.BLOCKED,
-        )
-
-        prev = self.prev
-        if isinstance(prev, JSONMap):
-            prevstate = prev.state
-        else:
-            prevstate = MapBase.USEFUL
-
-        if prevstate == MapBase.USEFUL:
-            assert self.state != MapBase.PRELIMINARY
-        elif prevstate == MapBase.PRELIMINARY:
-            assert self.state == MapBase.PRELIMINARY
-        elif prevstate == MapBase.FRINGE:
-            assert self.state == MapBase.PRELIMINARY
-        else:
-            # if prevstate is BLOCKED, we shouldn't have recursed here!
-            assert False, "should be unreachable"
-
-        if self.state == MapBase.BLOCKED:
-            assert self.single_nextmap is None
-            assert self.all_next is None
-
-        MapBase._check_invariants(self)
-
-    def mark_useful(self):
-        # mark self as useful, and also the most commonly instantiated
-        # children, recursively
-        assert self.state in (MapBase.FRINGE, MapBase.PRELIMINARY)
-        self.state = MapBase.USEFUL
-        maxchild = self.single_nextmap
-        if self.all_next is not None:
-            for child in self.all_next.itervalues():
-                if child.instantiation_count > maxchild.instantiation_count:
-                    maxchild = child
-        if maxchild is not None:
-            maxchild.mark_useful()
-            if self.all_next:
-                terminator = self.get_terminator()
-                for child in self.all_next.itervalues():
-                    if child is not maxchild:
-                        terminator.register_potential_fringe(child)
-                self.single_nextmap = maxchild
-
-    def mark_blocked(self):
-        self.state = MapBase.BLOCKED
-        if self.all_next:
-            for next in self.all_next.itervalues():
-                next.mark_blocked()
-        elif self.single_nextmap:
-            self.single_nextmap.mark_blocked()
-        self.single_nextmap = None
-        self.all_next = None
-        self.change_number_of_leaves(-self.number_of_leaves + 1)
-
-    def is_blocked(self):
-        return self.state == MapBase.BLOCKED
-
-    def average_instantiation(self):
-        return self.instantiation_count / float(self.number_of_leaves)
-
-    def is_useful(self):
-        return self.average_instantiation() > self.USEFUL_THRESHOLD
-
-    def should_cache(self):
-        return not (self.decoded_strings > 200 and self.cache_hits * 4 < 
self.decoded_strings)
-
-    def key_repr_cmp(self, ll_chars, i):
-        for j, c in enumerate(self.key_repr):
-            if ll_chars[i] != c:
-                return False
-            i += 1
-        return True
-
-    # _____________________________________________________
-    # methods for JsonDictStrategy
-
-    @jit.elidable
-    def get_index(self, w_key):
-        from pypy.objspace.std.unicodeobject import W_UnicodeObject
-        assert isinstance(w_key, W_UnicodeObject)
-        return self.get_key_to_index().get(w_key, -1)
-
-    def get_key_to_index(self):
-        from pypy.objspace.std.dictmultiobject import unicode_hash, unicode_eq
-        key_to_index = self.key_to_index
-        if key_to_index is None:
-            key_to_index = self.key_to_index = objectmodel.r_dict(unicode_eq, 
unicode_hash,
-                  force_non_null=True, simple_hash_eq=True)
-            # compute depth
-            curr = self
-            depth = 0
-            while True:
-                depth += 1
-                curr = curr.prev
-                if not isinstance(curr, JSONMap):
-                    break
-
-            curr = self
-            while depth:
-                depth -= 1
-                key_to_index[curr.w_key] = depth
-                curr = curr.prev
-                if not isinstance(curr, JSONMap):
-                    break
-        return key_to_index
-
-    def get_keys_in_order(self):
-        keys_in_order = self.keys_in_order
-        if keys_in_order is None:
-            key_to_index = self.get_key_to_index()
-            keys_in_order = self.keys_in_order = [None] * len(key_to_index)
-            for w_key, index in key_to_index.iteritems():
-                keys_in_order[index] = w_key
-        return keys_in_order
-
-    # _____________________________________________________
-
-    def _get_dot_text(self):
-        if self.all_next is None:
-            l = int(self.single_nextmap is not None)
-        else:
-            l = len(self.all_next) + 1
-        extra = ""
-        if self.decoded_strings:
-            extra = "\\n%s/%s (%s%%)" % (self.cache_hits, 
self.decoded_strings, self.cache_hits/float(self.decoded_strings))
-        res = ', label="#%s\\nchildren: %s%s"' % (self.instantiation_count, l, 
extra)
-        if self.state == MapBase.BLOCKED:
-            res += ", fillcolor=lightsalmon"
-        if self.state == MapBase.FRINGE:
-            res += ", fillcolor=lightgray"
-        if self.state == MapBase.PRELIMINARY:
-            res += ", fillcolor=lightslategray"
+            strhash ^= length
+            strhash = intmask(strhash)
+        self.pos = i
+        # check cache first:
+        key = (ll_chars, start, length, strhash)
+        try:
+            return self.cache[key]
+        except KeyError:
+            pass
+        res = self._create_string(start, i - 1, bits)
+        self.cache[key] = res
         return res
 
 
@@ -1071,4 +442,3 @@
         return w_res
     finally:
         decoder.close()
-
diff --git a/pypy/module/_pypyjson/simd.py b/pypy/module/_pypyjson/simd.py
deleted file mode 100644
--- a/pypy/module/_pypyjson/simd.py
+++ /dev/null
@@ -1,218 +0,0 @@
-from rpython.rtyper.lltypesystem import lltype, rffi
-from rpython.rlib import objectmodel, unroll
-from rpython.rlib.rarithmetic import r_uint, intmask, LONG_BIT
-from rpython.jit.backend.detect_cpu import autodetect
-
-# accelerators for string operations using simd on regular word sizes (*not*
-# SSE instructions). this style is sometimes called SWAR (SIMD Within A
-# Register) or "broadword techniques"
-
-# XXX remove wordsize and endianness restrictions properly, so far only x86-64
-# is tested
-
-USE_SIMD = False
-if LONG_BIT == 64:
-    WORD_SIZE = 8
-    EVERY_BYTE_ONE = 0x0101010101010101
-    EVERY_BYTE_HIGHEST_BIT = 0x8080808080808080
-    if autodetect() == "x86-64":
-        USE_SIMD = True
-else:
-    WORD_SIZE = 4
-    EVERY_BYTE_ONE = 0x01010101
-    EVERY_BYTE_HIGHEST_BIT = 0x80808080
-
-
-# helpers
-
-unrolling_wordsize = unroll.unrolling_iterable(range(WORD_SIZE))
-
-def char_repeated_word_width(ch):
-    return r_uint(EVERY_BYTE_ONE) * ord(ch)
-
-def any_char_zero(word):
-    return (word - r_uint(EVERY_BYTE_ONE)) & ~word & 
r_uint(EVERY_BYTE_HIGHEST_BIT)
-
-def any_char_in_words_zero(*words):
-    return _any_char_in_any_word_zero_accum(0, *words)
-
-def _any_char_in_any_word_zero_accum(accum, word, *words):
-    accum |= (word - r_uint(EVERY_BYTE_ONE)) & ~word
-    if not words:
-        return accum & r_uint(EVERY_BYTE_HIGHEST_BIT)
-    return _any_char_in_any_word_zero_accum(accum, *words)
-
-def print_chars(word):
-    # for debugging
-    out = ''
-    for i in range(WORD_SIZE):
-        out += chr(word & 0xff)
-        word >>= 8
-    return out
-
-def index_nonzero(word):
-    # XXX can be done very cheap in theory
-    assert word
-    for i in unrolling_wordsize:
-        if word & 0xff:
-            return i
-        word >>= 8
-    assert 0
-
-def index_zero(word):
-    # XXX can be done very cheap in theory
-    assert any_char_zero(word)
-    for i in unrolling_wordsize:
-        if not word & 0xff:
-            return i
-        word >>= 8
-    assert 0 # XXX ???
-
-def splice_words(word, offset, other):
-    mask = ((~r_uint(0)) << (8 * offset))
-    return (word & mask) | (other & ~mask)
-
-
-
[email protected]_inline
-def position_string_ender(word):
-    maskquote = char_repeated_word_width('"')
-    maskbackslash = char_repeated_word_width('\\')
-    maskx20 = char_repeated_word_width(chr(0xff - 0x1f))
-    # x1 and x2 check for equality, if a byte is 0 the corresponding
-    # char is equal to " or \
-    x1 = maskquote ^ word
-    x2 = maskbackslash ^ word
-    # x3 checks for char < 0x20, the byte is 0 in that case
-    x3 = maskx20 & word
-    return any_char_in_words_zero(x1, x2, x3)
-
[email protected]_inline
-def find_end_of_string_simd_unaligned(ll_chars, startpos, length):
-    ch = ll_chars[startpos]
-    strhash = (ord(ch) << 7) ^ 0x345678
-
-    wordarray = rffi.cast(rffi.ULONGP, rffi.ptradd(ll_chars, startpos))
-    num_safe_reads = (length - startpos) // WORD_SIZE
-
-    bits = 0
-    for i in range(num_safe_reads):
-        word = wordarray[i]
-        cond = position_string_ender(word)
-        if cond:
-            break
-        bits |= word
-        strhash = intmask((1000003 * strhash) ^ intmask(word))
-    else:
-        # didn't find end of string yet, look at remaining chars
-        word = 0
-        shift = 0
-        i = 0
-        for i in range(num_safe_reads * WORD_SIZE + startpos, length + 1):
-            ch = ll_chars[i]
-            if ch == '"' or ch == '\\' or ch < '\x20':
-                break
-            bits |= ord(ch)
-            word |= ord(ch) << shift
-            shift += 8
-        if shift:
-            strhash = intmask((1000003 * strhash) ^ intmask(word))
-
-        nonascii = bool(bits & char_repeated_word_width(chr(0x80)))
-        return strhash, nonascii, i
-
-    # compute endposition
-    nonzero = index_nonzero(cond)
-    endposition = startpos + i * WORD_SIZE + nonzero
-    if nonzero:
-        word = splice_words(r_uint(0), nonzero, word)
-        bits |= word
-        strhash = intmask((1000003 * strhash) ^ intmask(word))
-
-    nonascii = bool(bits & char_repeated_word_width(chr(0x80)))
-
-    return strhash, nonascii, endposition
-
[email protected]_inline
-def find_end_of_string_simd_unaligned_no_hash(ll_chars, startpos, length):
-    ch = ll_chars[startpos]
-
-    wordarray = rffi.cast(rffi.ULONGP, rffi.ptradd(ll_chars, startpos))
-    num_safe_reads = (length - startpos) // WORD_SIZE
-
-    bits = 0
-    for i in range(num_safe_reads):
-        word = wordarray[i]
-        cond = position_string_ender(word)
-        if cond:
-            break
-        bits |= word
-    else:
-        # didn't find end of string yet, look at remaining chars
-        word = 0
-        shift = 0
-        i = 0
-        for i in range(num_safe_reads * WORD_SIZE + startpos, length + 1):
-            ch = ll_chars[i]
-            if ch == '"' or ch == '\\' or ch < '\x20':
-                break
-            bits |= ord(ch)
-            word |= ord(ch) << shift
-            shift += WORD_SIZE
-
-        nonascii = bool(bits & char_repeated_word_width(chr(0x80)))
-        return nonascii, i
-
-    # compute endposition
-    nonzero = index_nonzero(cond)
-    endposition = startpos + i * WORD_SIZE + nonzero
-    if nonzero:
-        word = splice_words(r_uint(0), nonzero, word)
-        bits |= word
-
-    nonascii = bool(bits & char_repeated_word_width(chr(0x80)))
-
-    return nonascii, endposition
-
-
[email protected]_inline
-def find_end_of_string_slow(ll_chars, i, length):
-    ch = ll_chars[i]
-    strhash = (ord(ch) << 7) ^ 0x345678
-    word = 0
-    shift = 0
-
-    bits = 0
-
-    while True:
-        # this loop is a fast path for strings which do not contain escape
-        # characters
-        ch = ll_chars[i]
-        if ch == '"' or ch == '\\' or ch < '\x20':
-            break
-        i += 1
-        bits |= ord(ch)
-
-        word |= ord(ch) << shift
-        shift += WORD_SIZE
-        if shift == WORD_SIZE * WORD_SIZE:
-            strhash = intmask((1000003 * strhash) ^ word)
-            shift = 0
-            word = 0
-
-    if shift:
-        strhash = intmask((1000003 * strhash) ^ word)
-    return strhash, bool(bits & 0x80), i
-
-if USE_SIMD:
-    find_end_of_string = find_end_of_string_simd_unaligned
-    find_end_of_string_no_hash = find_end_of_string_simd_unaligned_no_hash
-else:
-    find_end_of_string = find_end_of_string_slow
-
-    @objectmodel.always_inline
-    def find_end_of_string_no_hash(ll_chars, i, length):
-        _, nonascii, i = find_end_of_string_slow(ll_chars, i, length)
-        return (nonascii, i)
-
-
diff --git a/pypy/module/_pypyjson/test/test__pypyjson.py 
b/pypy/module/_pypyjson/test/test__pypyjson.py
--- a/pypy/module/_pypyjson/test/test__pypyjson.py
+++ b/pypy/module/_pypyjson/test/test__pypyjson.py
@@ -1,253 +1,31 @@
 # -*- encoding: utf-8 -*-
-import pytest
-from pypy.module._pypyjson.interp_decoder import JSONDecoder, Terminator, 
MapBase
-from rpython.rtyper.lltypesystem import lltype, rffi
+from pypy.module._pypyjson.interp_decoder import JSONDecoder
 
+def test_skip_whitespace():
+    s = '   hello   '
+    dec = JSONDecoder('fake space', s)
+    assert dec.pos == 0
+    assert dec.skip_whitespace(0) == 3
+    assert dec.skip_whitespace(3) == 3
+    assert dec.skip_whitespace(8) == len(s)
+    dec.close()
 
-class TestJson(object):
-    def test_skip_whitespace(self):
-        s = '   hello   '
-        dec = JSONDecoder(self.space, s)
-        assert dec.pos == 0
-        assert dec.skip_whitespace(0) == 3
-        assert dec.skip_whitespace(3) == 3
-        assert dec.skip_whitespace(8) == len(s)
-        dec.close()
+class FakeSpace(object):
+    def newutf8(self, s, l):
+        return s
 
-    def test_json_map(self):
-        m = Terminator(self.space)
-        w_a = self.space.newutf8("a", 1)
-        w_b = self.space.newutf8("b", 1)
-        w_c = self.space.newutf8("c", 1)
-        m1 = m.get_next(w_a, '"a"', 0, 3)
-        assert m1.w_key == w_a
-        assert m1.single_nextmap is None
-        assert m1.key_repr == '"a"'
-        assert m1.key_repr_cmp('"a": 123', 0)
-        assert not m1.key_repr_cmp('b": 123', 0)
-        assert m.single_nextmap.w_key == w_a
-
-        m2 = m.get_next(w_a, '"a"', 0, 3)
-        assert m2 is m1
-
-        m3 = m.get_next(w_b, '"b"', 0, 3)
-        assert m3.w_key == w_b
-        assert m3.single_nextmap is None
-        assert m3.key_repr == '"b"'
-        assert m.single_nextmap is m1
-
-        m4 = m3.get_next(w_c, '"c"', 0, 3)
-        assert m4.w_key == w_c
-        assert m4.single_nextmap is None
-        assert m4.key_repr == '"c"'
-        assert m3.single_nextmap is m4
-
-    def test_json_map_get_index(self):
-        m = Terminator(self.space)
-        w_a = self.space.newutf8("a", 1)
-        w_b = self.space.newutf8("b", 1)
-        w_c = self.space.newutf8("c", 1)
-        m1 = m.get_next(w_a, 'a"', 0, 2)
-        assert m1.get_index(w_a) == 0
-        assert m1.get_index(w_b) == -1
-
-        m2 = m.get_next(w_b, 'b"', 0, 2)
-        assert m2.get_index(w_b) == 0
-        assert m2.get_index(w_a) == -1
-
-        m3 = m2.get_next(w_c, 'c"', 0, 2)
-        assert m3.get_index(w_b) == 0
-        assert m3.get_index(w_c) == 1
-        assert m3.get_index(w_a) == -1
-
-    def test_decode_key(self):
-        m = Terminator(self.space)
-        m_diff = Terminator(self.space)
-        for s1 in ["abc", "1001" * 10, u"&#228;".encode("utf-8")]:
-            s = ' "%s"   "%s" "%s"' % (s1, s1, s1)
-            dec = JSONDecoder(self.space, s)
-            assert dec.pos == 0
-            m1 = dec.decode_key(dec.skip_whitespace(0), m)
-            assert m1.w_key._utf8 == s1
-            assert m1.key_repr == '"%s"' % s1
-
-            # check caching on w_key level
-            m2 = dec.decode_key(dec.skip_whitespace(dec.pos), m_diff)
-            assert m1.w_key is m2.w_key
-
-            # check caching on map level
-            m3 = dec.decode_key(dec.skip_whitespace(dec.pos), m_diff)
-            assert m3 is m2
-            dec.close()
-
-    def test_decode_string_caching(self):
-        for s1 in ["abc", u"&#228;".encode("utf-8")]:
-            s = '"%s"   "%s"    "%s"' % (s1, s1, s1)
-            dec = JSONDecoder(self.space, s)
-            dec.MIN_SIZE_FOR_STRING_CACHE = 0
-            assert dec.pos == 0
-            w_x = dec.decode_string(1)
-            w_y = dec.decode_string(dec.skip_whitespace(dec.pos) + 1)
-            assert w_x is not w_y
-            # check caching
-            w_z = dec.decode_string(dec.skip_whitespace(dec.pos) + 1)
-            assert w_z is w_y
-            dec.close()
-
-    def _make_some_maps(self):
-        # base -> m1 -> m2 -> m3
-        #                \-> m4
-        w_a = self.space.newutf8("a", 1)
-        w_b = self.space.newutf8("b", 1)
-        w_c = self.space.newutf8("c", 1)
-        w_d = self.space.newutf8("d", 1)
-        base = Terminator(self.space)
-        base.instantiation_count = 6
-        m1 = base.get_next(w_a, 'a"', 0, 2)
-        m2 = m1.get_next(w_b, 'b"', 0, 2)
-        m3 = m2.get_next(w_c, 'c"', 0, 2)
-        m4 = m2.get_next(w_d, 'd"', 0, 2)
-        return base, m1, m2, m3, m4
-
-    # unit tests for map state transistions
-    def test_fringe_to_useful(self):
-        base, m1, m2, m3, m4 = self._make_some_maps()
-        base.instantiation_count = 6
-        assert m1.state == MapBase.FRINGE
-        m1.instantiation_count = 6
-
-        assert m2.state == MapBase.PRELIMINARY
-        m2.instantiation_count = 6
-
-        assert m3.state == MapBase.PRELIMINARY
-        m3.instantiation_count = 2
-        assert m2.single_nextmap is m3
-
-        assert m4.state == MapBase.PRELIMINARY
-        m4.instantiation_count = 4
-
-        m1.mark_useful()
-        assert m1.state == MapBase.USEFUL
-        assert m2.state == MapBase.USEFUL
-        assert m3.state == MapBase.FRINGE
-        assert m4.state == MapBase.USEFUL
-        assert m2.single_nextmap is m4
-
-        assert m1.number_of_leaves == 2
-        base._check_invariants()
-
-    def test_number_of_leaves(self):
-        w_x = self.space.newutf8("x", 1)
-        base, m1, m2, m3, m4 = self._make_some_maps()
-        assert base.number_of_leaves == 2
-        assert m1.number_of_leaves == 2
-        assert m2.number_of_leaves == 2
-        assert m3.number_of_leaves == 1
-        assert m4.number_of_leaves == 1
-        m5 = m2.get_next(w_x, 'x"', 0, 2)
-        assert base.number_of_leaves == 3
-        assert m1.number_of_leaves == 3
-        assert m2.number_of_leaves == 3
-        assert m5.number_of_leaves == 1
-
-    def test_cleanup_fringe_simple(self):
-        base, m1, m2, m3, m4 = self._make_some_maps()
-        base.instantiation_count = 6
-        assert m1.state == MapBase.FRINGE
-        m1.instantiation_count = 6
-        m2.instantiation_count = 6
-        m3.instantiation_count = 2
-        m4.instantiation_count = 4
-        assert base.current_fringe == {m1: None}
-
-        m1.mark_useful()
-        assert base.current_fringe == {m1: None, m3: None} # not cleaned up
-        base.cleanup_fringe()
-        assert base.current_fringe == {m3: None}
-
-    def test_cleanup_fringe_block(self):
-        w_a = self.space.newutf8("a", 1)
-        w_b = self.space.newutf8("b", 1)
-        w_c = self.space.newutf8("c", 1)
-        w_d = self.space.newutf8("d", 1)
-        base = Terminator(self.space)
-        base.instantiation_count = 6
-        m1 = base.get_next(w_a, 'a"', 0, 2)
-        m2 = base.get_next(w_b, 'b"', 0, 2)
-        m3 = base.get_next(w_c, 'c"', 0, 2)
-        m4 = base.get_next(w_d, 'd"', 0, 2)
-        m5 = m4.get_next(w_a, 'a"', 0, 2)
-        base.instantiation_count = 7
-        m1.instantiation_count = 2
-        m2.instantiation_count = 2
-        m3.instantiation_count = 2
-        m4.instantiation_count = 1
-        m5.instantiation_count = 1
-        assert base.current_fringe == dict.fromkeys([m1, m2, m3, m4])
-
-        base.cleanup_fringe()
-        assert base.current_fringe == dict.fromkeys([m1, m2, m3])
-        assert m4.state == MapBase.BLOCKED
-        assert m4.single_nextmap is None
-        assert m4.all_next is None
-        assert m5.state == MapBase.BLOCKED
-        assert m5.single_nextmap is None
-        assert m5.all_next is None
-
-    def test_deal_with_blocked(self):
-        w_a = self.space.newutf8("a", 1)
-        w_b = self.space.newutf8("b", 1)
-        w_c = self.space.newutf8("c", 1)
-        space = self.space
-        s = '{"a": 1, "b": 2, "c": 3}'
-        dec = JSONDecoder(space, s)
-        dec.startmap = base = Terminator(space)
-        m1 = base.get_next(w_a, 'a"', 0, 2)
-        m2 = m1.get_next(w_b, 'b"', 0, 2)
-        m2.mark_blocked()
-        w_res = dec.decode_object(1)
-        assert space.int_w(space.len(w_res)) == 3
-        assert space.int_w(space.getitem(w_res, w_a)) == 1
-        assert space.int_w(space.getitem(w_res, w_b)) == 2
-        assert space.int_w(space.getitem(w_res, w_c)) == 3
-        dec.close()
-
-    def test_deal_with_blocked_number_of_leaves(self):
-        w_a = self.space.newutf8("a", 1)
-        w_b = self.space.newutf8("b", 1)
-        w_x = self.space.newutf8("x", 1)
-        w_u = self.space.newutf8("u", 1)
-        space = self.space
-        base = Terminator(space)
-        m1 = base.get_next(w_a, 'a"', 0, 2)
-        m2 = m1.get_next(w_b, 'b"', 0, 2)
-        m2.get_next(w_x, 'x"', 0, 2)
-        m2.get_next(w_u, 'u"', 0, 2)
-        assert base.number_of_leaves == 2
-        m2.mark_blocked()
-        assert base.number_of_leaves == 1
-
-    @pytest.mark.skip()
-    def test_caching_stats(self):
-        w_a = self.space.newutf8("a", 1)
-        w_b = self.space.newutf8("b", 1)
-        w_x = self.space.newutf8("x", 1)
-        w_u = self.space.newutf8("u", 1)
-        space = self.space
-        base = Terminator(space)
-        m1 = base.get_next(w_a, 'a"', 0, 2)
-        m2 = m1.get_next(w_b, 'b"', 0, 2)
-        m2.get_next(w_x, 'x"', 0, 2)
-        m2.get_next(w_u, 'u"', 0, 2)
-        m1.decode_string = 300
-        m1.cache_hits = 0
-        m3 = base.get_next(w_b, '"b"', 0, 3)
-        m3.decode_string = 300
-        m3.cache_hits = 300
-        caching_maps, total_maps = base._get_caching_stats()
-        assert caching_maps == 5
-        assert total_maps == 6
-
+def test_decode_key():
+    s1 = "123" * 100
+    s = ' "%s"   "%s" ' % (s1, s1)
+    dec = JSONDecoder(FakeSpace(), s)
+    assert dec.pos == 0
+    x = dec.decode_key(0)
+    assert x == s1
+    # check caching
+    y = dec.decode_key(dec.pos)
+    assert y == s1
+    assert y is x
+    dec.close()
 
 class AppTest(object):
     spaceconfig = {"objspace.usemodules._pypyjson": True}
@@ -277,7 +55,7 @@
         raises(ValueError, _pypyjson.loads, 'fa')
         raises(ValueError, _pypyjson.loads, 'f')
         raises(ValueError, _pypyjson.loads, 'falXX')
-
+        
 
     def test_decode_string(self):
         import _pypyjson
@@ -307,7 +85,7 @@
         import _pypyjson
         assert _pypyjson.loads(r'"\\"') == u'\\'
         assert _pypyjson.loads(r'"\""') == u'"'
-        assert _pypyjson.loads(r'"\/"') == u'/'
+        assert _pypyjson.loads(r'"\/"') == u'/'       
         assert _pypyjson.loads(r'"\b"') == u'\b'
         assert _pypyjson.loads(r'"\f"') == u'\f'
         assert _pypyjson.loads(r'"\n"') == u'\n'
@@ -323,19 +101,12 @@
         import _pypyjson
         s = r'"hello\nworld' # missing the trailing "
         raises(ValueError, "_pypyjson.loads(s)")
-
+        
     def test_escape_sequence_unicode(self):
         import _pypyjson
         s = r'"\u1234"'
         assert _pypyjson.loads(s) == u'\u1234'
 
-    def test_escape_sequence_mixed_with_utf8(self):
-        import _pypyjson
-        utf8 = u'&#228;"'.encode("utf-8")
-        assert _pypyjson.loads(r'"abc\\' + utf8) == u'abc\\&#228;'
-        assert _pypyjson.loads(r'"abc\"' + utf8) == u'abc"&#228;'
-        assert _pypyjson.loads(r'"def\u1234' + utf8) == u'def\u1234&#228;'
-
     def test_invalid_utf_8(self):
         import _pypyjson
         s = '"\xe0"' # this is an invalid UTF8 sequence inside a string
@@ -405,18 +176,13 @@
         s = '{"hello": "world", "aaa": "bbb"}'
         assert _pypyjson.loads(s) == {'hello': 'world',
                                       'aaa': 'bbb'}
-        assert _pypyjson.loads(s) == {'hello': 'world',
-                                      'aaa': 'bbb'}
         raises(ValueError, _pypyjson.loads, '{"key"')
         raises(ValueError, _pypyjson.loads, '{"key": 42')
 
-        assert _pypyjson.loads('{"neighborhood": ""}') == {
-            "neighborhood": ""}
-
     def test_decode_object_nonstring_key(self):
         import _pypyjson
         raises(ValueError, "_pypyjson.loads('{42: 43}')")
-
+        
     def test_decode_array(self):
         import _pypyjson
         assert _pypyjson.loads('[]') == []
@@ -497,4 +263,3 @@
         for inputtext, errmsg in test_cases:
             exc = raises(ValueError, _pypyjson.loads, inputtext)
             assert str(exc.value) == errmsg
-
diff --git a/pypy/module/_pypyjson/test/test_simd.py 
b/pypy/module/_pypyjson/test/test_simd.py
deleted file mode 100644
--- a/pypy/module/_pypyjson/test/test_simd.py
+++ /dev/null
@@ -1,104 +0,0 @@
-import sys
-import pytest
-from rpython.rtyper.lltypesystem import lltype, rffi
-from rpython.rlib.rarithmetic import r_uint, intmask
-
-from pypy.module._pypyjson.simd import USE_SIMD
-from pypy.module._pypyjson.simd import find_end_of_string_slow
-from pypy.module._pypyjson.simd import print_chars
-from pypy.module._pypyjson.simd import find_end_of_string_simd_unaligned, 
WORD_SIZE
-from pypy.module._pypyjson.simd import 
find_end_of_string_simd_unaligned_no_hash
-
-from hypothesis import example, given, strategies
-
-if not USE_SIMD:
-    pytest.skip("only implemented for 64 bit for now")
-
-def fill_to_word_size(res, ch=" "):
-    if len(res) % WORD_SIZE != 0:
-        res += ch * (WORD_SIZE - (len(res) % WORD_SIZE))
-    return res
-
-def string_to_word(s):
-    assert len(s) == WORD_SIZE
-    ll_chars, flag = rffi.get_nonmovingbuffer_final_null(s)
-    try:
-        wordarray = rffi.cast(rffi.ULONGP, ll_chars)
-        return wordarray[0]
-    finally:
-        rffi.free_nonmovingbuffer(s, ll_chars, flag)
-
-def ll(callable, string, *args):
-    ll_chars, flag = rffi.get_nonmovingbuffer_final_null(string)
-    try:
-        return callable(ll_chars, *args)
-    finally:
-        rffi.free_nonmovingbuffer(string, ll_chars, flag)
-
-word = strategies.builds(
-    r_uint, strategies.integers(min_value=-sys.maxint-1, max_value=sys.maxint))
-
-def build_string(prefix, content, end, suffix):
-    res = prefix + '"' + "".join([chr(x) for x in content]) + end + suffix
-    return fill_to_word_size(res), len(prefix) + 1
-
-string_in_context_strategy = strategies.builds(
-    build_string, prefix=strategies.binary(),
-    content=strategies.lists(strategies.integers(1, 255), min_size=1),
-    end=strategies.sampled_from('"\\\x00\x01'),
-    suffix=strategies.binary())
-
-def compare(string, res1, res2):
-    hash1, nonascii1, endindex1 = res1
-    hash2, nonascii2, endindex2 = res2
-    assert endindex1 == endindex2
-    if string[endindex1 - 1] == '"':
-        assert hash1 == hash2
-    assert nonascii1 == nonascii2
-
-
-@example(('"       \x80"      ', 1))
-@example(('"\x01"          ', 1))
-@example(('"aaaaaaaa"\x00\x00\x00\x00\x00\x00\x00       ', 1))
-@example(('"aaaaaaaa"      ', 1))
-@example(('"12"', 1))
-@example(('"1234567abcdefghAB"', 1))
-@example(('"1234567abcdefgh"', 1))
-@example((' "123456ABCDEF"        \x00', 2))
-@example((' "123456aaaaaaaaABCDEF"\x00', 2))
-@given(string_in_context_strategy)
-def test_find_end_of_string(a):
-    (string, startindex) = a
-    res = ll(find_end_of_string_slow, string, startindex, len(string))
-    hash, nonascii1, endposition1 = res
-    ch = string[endposition1]
-    assert ch == '"' or ch == '\\' or ch < '\x20'
-    for ch in string[startindex:endposition1]:
-        assert not (ch == '"' or ch == '\\' or ch < '\x20')
-    compare(string, res, ll(find_end_of_string_simd_unaligned, string, 
startindex, len(string)))
-
-    nonascii2, endposition2 = ll(find_end_of_string_simd_unaligned_no_hash, 
string, startindex, len(string))
-    assert nonascii1 == nonascii2
-    assert endposition1 == endposition2
-
-@given(string_in_context_strategy, strategies.binary(min_size=1))
-def test_find_end_of_string_position_invariance(a, prefix):
-    fn = find_end_of_string_simd_unaligned
-    (string, startindex) = a
-    h1, nonascii1, i1 = ll(fn, string, startindex, len(string))
-    string2 = prefix + string
-    h2, nonascii2, i2 = ll(fn, string2, startindex + len(prefix), len(string) 
+ len(prefix))
-    assert h1 == h2
-    assert nonascii1 == nonascii2
-    assert i1 + len(prefix) == i2
-
-@given(string_in_context_strategy, strategies.binary(min_size=1))
-def test_find_end_of_string_position_invariance_no_hash(a, prefix):
-    fn = find_end_of_string_simd_unaligned_no_hash
-    (string, startindex) = a
-    nonascii1, i1 = ll(fn, string, startindex, len(string))
-    string2 = prefix + string
-    nonascii2, i2 = ll(fn, string2, startindex + len(prefix), len(string) + 
len(prefix))
-    assert nonascii1 == nonascii2
-    assert i1 + len(prefix) == i2
-
diff --git a/pypy/objspace/std/jsondict.py b/pypy/objspace/std/jsondict.py
deleted file mode 100644
--- a/pypy/objspace/std/jsondict.py
+++ /dev/null
@@ -1,167 +0,0 @@
-"""dict implementation specialized for object loaded by the _pypyjson module.
-
-Somewhat similar to MapDictStrategy, also uses a map.
-"""
-
-from rpython.rlib import jit, rerased, objectmodel, debug
-
-from pypy.objspace.std.dictmultiobject import (
-    UnicodeDictStrategy, DictStrategy,
-    create_iterator_classes, W_DictObject)
-
-
-def from_values_and_jsonmap(space, values_w, jsonmap):
-    if not objectmodel.we_are_translated():
-        assert len(values_w) == len(jsonmap.get_keys_in_order())
-        assert len(values_w) != 0
-    debug.make_sure_not_resized(values_w)
-    strategy = jsonmap.strategy_instance
-    if strategy is None:
-        jsonmap.strategy_instance = strategy = JsonDictStrategy(space, jsonmap)
-    storage = strategy.erase(values_w)
-    return W_DictObject(space, strategy, storage)
-
-def devolve_jsonmap_dict(w_dict):
-    assert isinstance(w_dict, W_DictObject)
-    strategy = w_dict.get_strategy()
-    assert isinstance(strategy, JsonDictStrategy)
-    strategy.switch_to_unicode_strategy(w_dict)
-
-def get_jsonmap_from_dict(w_dict):
-    assert isinstance(w_dict, W_DictObject)
-    strategy = w_dict.get_strategy()
-    assert isinstance(strategy, JsonDictStrategy)
-    return strategy.jsonmap
-
-class JsonDictStrategy(DictStrategy):
-    erase, unerase = rerased.new_erasing_pair("jsondict")
-    erase = staticmethod(erase)
-    unerase = staticmethod(unerase)
-
-    _immutable_fields_ = ['jsonmap']
-
-    def __init__(self, space, jsonmap):
-        DictStrategy.__init__(self, space)
-        self.jsonmap = jsonmap
-
-    def wrap(self, w_key):
-        return w_key
-
-    def wrapkey(space, key):
-        return key
-
-    def get_empty_storage(self):
-        raise NotImplementedError("should not be reachable")
-
-    def is_correct_type(self, w_obj):
-        space = self.space
-        return space.is_w(space.type(w_obj), space.w_unicode)
-
-    def _never_equal_to(self, w_lookup_type):
-        return False
-
-    def length(self, w_dict):
-        return len(self.unerase(w_dict.dstorage))
-
-    def getitem(self, w_dict, w_key):
-        if self.is_correct_type(w_key):
-            return self.getitem_unicode(w_dict, w_key)
-        else:
-            self.switch_to_unicode_strategy(w_dict)
-            return w_dict.getitem(w_key)
-
-    def getitem_unicode(self, w_dict, w_key):
-        storage_w = self.unerase(w_dict.dstorage)
-        if jit.isconstant(w_key):
-            jit.promote(self)
-        index = self.jsonmap.get_index(w_key)
-        if index == -1:
-            return None
-        return storage_w[index]
-
-    def setitem(self, w_dict, w_key, w_value):
-        if self.is_correct_type(w_key):
-            storage_w = self.unerase(w_dict.dstorage)
-            index = self.jsonmap.get_index(w_key)
-            if index != -1:
-                storage_w[index] = w_value
-                return
-        self.switch_to_unicode_strategy(w_dict)
-        w_dict.setitem(w_key, w_value)
-
-    def setdefault(self, w_dict, w_key, w_default):
-        if self.is_correct_type(w_key):
-            w_result = self.getitem_unicode(w_dict, w_key)
-            if w_result is not None:
-                return w_result
-        self.switch_to_unicode_strategy(w_dict)
-        return w_dict.setdefault(w_key, w_default)
-
-    def delitem(self, w_dict, w_key):
-        self.switch_to_unicode_strategy(w_dict)
-        return w_dict.delitem(w_key)
-
-    def popitem(self, w_dict):
-        self.switch_to_unicode_strategy(w_dict)
-        return w_dict.popitem()
-
-    def switch_to_unicode_strategy(self, w_dict):
-        strategy = self.space.fromcache(UnicodeDictStrategy)
-        values_w = self.unerase(w_dict.dstorage)
-        storage = strategy.get_empty_storage()
-        d_new = strategy.unerase(storage)
-        keys_in_order = self.jsonmap.get_keys_in_order()
-        assert len(keys_in_order) == len(values_w)
-        for index, w_key in enumerate(keys_in_order):
-            assert w_key is not None
-            assert type(w_key) is self.space.UnicodeObjectCls
-            d_new[w_key] = values_w[index]
-        w_dict.set_strategy(strategy)
-        w_dict.dstorage = storage
-
-    def w_keys(self, w_dict):
-        return self.space.newlist(self.jsonmap.get_keys_in_order())
-
-    def values(self, w_dict):
-        return self.unerase(w_dict.dstorage)[:]  # to make resizable
-
-    def items(self, w_dict):
-        space = self.space
-        storage_w = self.unerase(w_dict.dstorage)
-        res = [None] * len(storage_w)
-        for index, w_key in enumerate(self.jsonmap.get_keys_in_order()):
-            res[index] = space.newtuple([w_key, storage_w[index]])
-        return res
-
-    def getiterkeys(self, w_dict):
-        return iter(self.jsonmap.get_keys_in_order())
-
-    def getitervalues(self, w_dict):
-        storage_w = self.unerase(w_dict.dstorage)
-        return iter(storage_w)
-
-    def getiteritems_with_hash(self, w_dict):
-        storage_w = self.unerase(w_dict.dstorage)
-        return ZipItemsWithHash(self.jsonmap.get_keys_in_order(), storage_w)
-
-
-class ZipItemsWithHash(object):
-    def __init__(self, list1, list2):
-        assert len(list1) == len(list2)
-        self.list1 = list1
-        self.list2 = list2
-        self.i = 0
-
-    def __iter__(self):
-        return self
-
-    def next(self):
-        i = self.i
-        if i >= len(self.list1):
-            raise StopIteration
-        self.i = i + 1
-        w_key = self.list1[i]
-        return (w_key, self.list2[i], w_key.hash_w())
-
-
-create_iterator_classes(JsonDictStrategy)
diff --git a/pypy/objspace/std/test/test_jsondict.py 
b/pypy/objspace/std/test/test_jsondict.py
deleted file mode 100644
--- a/pypy/objspace/std/test/test_jsondict.py
+++ /dev/null
@@ -1,89 +0,0 @@
-
-class AppTest(object):
-    spaceconfig = {"objspace.usemodules._pypyjson": True}
-
-    def test_check_strategy(self):
-        import __pypy__
-        import _pypyjson
-
-        d = _pypyjson.loads('{"a": 1}')
-        assert __pypy__.strategy(d) == "JsonDictStrategy"
-        d = _pypyjson.loads('{}')
-        assert __pypy__.strategy(d) == "EmptyDictStrategy"
-
-    def test_simple(self):
-        import __pypy__
-        import _pypyjson
-
-        d = _pypyjson.loads('{"a": 1, "b": "x"}')
-        assert len(d) == 2
-        assert d[u"a"] == 1
-        assert d[u"b"] == u"x"
-        assert u"c" not in d
-
-        d[u"a"] = 5
-        assert d[u"a"] == 5
-        assert __pypy__.strategy(d) == "JsonDictStrategy"
-
-        # devolve it
-        assert not 1 in d
-        assert __pypy__.strategy(d) == "UnicodeDictStrategy"
-        assert len(d) == 2
-        assert d[u"a"] == 5
-        assert d[u"b"] == u"x"
-        assert u"c" not in d
-
-    def test_setdefault(self):
-        import __pypy__
-        import _pypyjson
-
-        d = _pypyjson.loads('{"a": 1, "b": "x"}')
-        assert d.setdefault(u"a", "blub") == 1
-        d.setdefault(u"x", 23)
-        assert __pypy__.strategy(d) == "UnicodeDictStrategy"
-        assert len(d) == 3
-        assert d == {u"a": 1, u"b": "x", u"x": 23}
-
-    def test_delitem(self):
-        import __pypy__
-        import _pypyjson
-
-        d = _pypyjson.loads('{"a": 1, "b": "x"}')
-        del d[u"a"]
-        assert __pypy__.strategy(d) == "UnicodeDictStrategy"
-        assert len(d) == 1
-        assert d == {u"b": "x"}
-
-    def test_popitem(self):
-        import __pypy__
-        import _pypyjson
-
-        d = _pypyjson.loads('{"a": 1, "b": "x"}')
-        k, v = d.popitem()
-        assert __pypy__.strategy(d) == "UnicodeDictStrategy"
-        if k == u"a":
-            assert v == 1
-            assert len(d) == 1
-            assert d == {u"b": "x"}
-        else:
-            assert v == u"x"
-            assert len(d) == 1
-            assert d == {u"a": 1}
-
-    def test_keys_value_items(self):
-        import __pypy__
-        import _pypyjson
-
-        d = _pypyjson.loads('{"a": 1, "b": "x"}')
-        assert d.keys() == [u"a", u"b"]
-        assert d.values() == [1, u"x"]
-        assert d.items() == [(u"a", 1), (u"b", u"x")]
-
-    def test_iter_keys_value_items(self):
-        import __pypy__
-        import _pypyjson
-
-        d = _pypyjson.loads('{"a": 1, "b": "x"}')
-        assert list(d.iterkeys()) == [u"a", u"b"]
-        assert list(d.itervalues()) == [1, u"x"]
-        assert list(d.iteritems()) == [(u"a", 1), (u"b", u"x")]
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to