Author: Carl Friedrich Bolz-Tereick <[email protected]>
Branch: json-decoder-maps
Changeset: r97543:5a9168e9cb01
Date: 2019-09-19 19:40 +0200
http://bitbucket.org/pypy/pypy/changeset/5a9168e9cb01/

Log:    some comments, rename a few attributes

diff --git a/pypy/module/_pypyjson/interp_decoder.py 
b/pypy/module/_pypyjson/interp_decoder.py
--- a/pypy/module/_pypyjson/interp_decoder.py
+++ b/pypy/module/_pypyjson/interp_decoder.py
@@ -88,15 +88,14 @@
 
         # two caches, one for keys, one for general strings. they both have the
         # form {hash-as-int: StringCacheEntry} and they don't deal with
-        # collisions at all. For every hash there is simply one string stored.
-        #
-        # <antocuni> if I understand it correctly, the point is not only to
-        # cache the wrapped strings, but also to avoid to memcpy them out of
-        # the original string in the common case. Maybe this should be
-        # explained here.
-        self.cache = {}
-        self.cache_wrapped = {}
+        # collisions at all. For every hash there is simply one string stored
+        # and we ignore collisions.
+        self.cache_keys = {}
+        self.cache_values = {}
 
+        # we don't cache *all* non-key strings, that would be too expensive.
+        # instead, keep a cache of the last 16 strings hashes around and add a
+        # string to the cache only if its hash is seen a second time
         self.lru_cache = [0] * self.LRU_SIZE
         self.lru_index = 0
 
@@ -553,7 +552,8 @@
     def decode_string(self, i, contextmap=None):
         """ Decode a string at position i (which is right after the opening ").
         Optionally pass a contextmap, if the value is decoded as the value of a
-        dict. """
+        dict."""
+
         ll_chars = self.ll_chars
         start = i
         ch = ll_chars[i]
@@ -563,6 +563,14 @@
 
         cache = True
         if contextmap is not None:
+            # keep some statistics about the usefulness of the string cache on
+            # the contextmap
+            # the intuition about the contextmap is as follows:
+            # often there are string values stored in dictionaries that can
+            # never be usefully cached, like unique ids of objects. Then the
+            # strings *in those fields* of all objects should never be cached.
+            # However, the content of other fields can still be useful to
+            # cache.
             contextmap.decoded_strings += 1
             if not contextmap.should_cache_strings():
                 cache = False
@@ -589,17 +597,19 @@
 
         # check cache first:
         try:
-            entry = self.cache_wrapped[strhash]
+            entry = self.cache_values[strhash]
         except KeyError:
             w_res = self._create_string_wrapped(start, i, nonascii)
             # only add *some* strings to the cache, because keeping them all is
-            # way too expensive
+            # way too expensive. first we check if the contextmap has caching
+            # disabled completely. if not, we check whether we have recently
+            # seen the same hash already, if yes, we cache the string.
             if ((contextmap is not None and
                         contextmap.decoded_strings < 
self.STRING_CACHE_EVALUATION_SIZE) or
                     strhash in self.lru_cache):
                 entry = StringCacheEntry(
                         self.getslice(start, start + length), w_res)
-                self.cache_wrapped[strhash] = entry
+                self.cache_values[strhash] = entry
             else:
                 self.lru_cache[self.lru_index] = strhash
                 self.lru_index = (self.lru_index + 1) & self.LRU_MASK
@@ -620,7 +630,7 @@
 
     def _decode_key_map(self, i, currmap):
         ll_chars = self.ll_chars
-        # first try to see whether we happen to find currmap.single_nextmap
+        # first try to see whether we happen to find currmap.nextmap_first
         nextmap = currmap.fast_path_key_parse(self, i)
         if nextmap is not None:
             return nextmap
@@ -653,12 +663,12 @@
         self.pos = i + 1
         # check cache first:
         try:
-            entry = self.cache[strhash]
+            entry = self.cache_keys[strhash]
         except KeyError:
             w_res = self._create_string_wrapped(start, i, nonascii)
             entry = StringCacheEntry(
                     self.getslice(start, start + length), w_res)
-            self.cache[strhash] = entry
+            self.cache_keys[strhash] = entry
             return w_res
         if not entry.compare(ll_chars, start, length):
             # collision! hopefully rare
@@ -677,8 +687,8 @@
 
 
 class StringCacheEntry(object):
-    """ A cache entry, bundling the encoded version of a string, and its 
wrapped
-    decoded variant. """
+    """ A cache entry, bundling the encoded version of a string as it appears
+    in the input string, and its wrapped decoded variant. """
     def __init__(self, repr, w_uni):
         # repr is the escaped string
         self.repr = repr
@@ -725,7 +735,7 @@
     # preliminary maps. When we have too many fringe maps, we remove the least
     # commonly instantiated fringe map and mark it as blocked.
 
-    # allowed graph edges or nodes in all_next:
+    # allowed graph edges or nodes in nextmap_all:
     #    USEFUL -------
     #   /      \       \
     #  v        v       v
@@ -747,7 +757,7 @@
     #    v  v
     #   BLOCKED
 
-    # the single_nextmap edge can only be these graph edges:
+    # the nextmap_first edge can only be these graph edges:
     #  USEFUL
     #   |
     #   v
@@ -773,14 +783,12 @@
     def __init__(self, space):
         self.space = space
 
-        # <antocuni> what about calling this "first_nextmap"? Or, even better:
-        # nextmap_first and nextmap_all </antocuni>
-        # a single transition is stored in .single_nextmap
-        self.single_nextmap = None
+        # a single transition is stored in .nextmap_first
+        self.nextmap_first = None
 
-        # all_next is only initialized after seeing the *second* transition
-        # but then it also contains .single_nextmap
-        self.all_next = None # later dict {key: nextmap}
+        # nextmap_all is only initialized after seeing the *second* transition
+        # but then it also contains .nextmap_first
+        self.nextmap_all = None # later dict {key: nextmap}
 
         # keep some statistics about every map: how often it was instantiated
         # and how many non-blocked leaves the map transition tree has, starting
@@ -789,42 +797,42 @@
         self.number_of_leaves = 1
 
     def _check_invariants(self):
-        if self.all_next:
-            for next in self.all_next.itervalues():
+        if self.nextmap_all:
+            for next in self.nextmap_all.itervalues():
                 next._check_invariants()
-        elif self.single_nextmap:
-            self.single_nextmap._check_invariants()
+        elif self.nextmap_first:
+            self.nextmap_first._check_invariants()
 
     def get_next(self, w_key, string, start, stop, terminator):
         from pypy.objspace.std.dictmultiobject import unicode_hash, unicode_eq
         if isinstance(self, JSONMap):
             assert not self.state == MapBase.BLOCKED
-        single_nextmap = self.single_nextmap
-        if (single_nextmap is not None and
-                single_nextmap.w_key.eq_w(w_key)):
-            return single_nextmap
+        nextmap_first = self.nextmap_first
+        if (nextmap_first is not None and
+                nextmap_first.w_key.eq_w(w_key)):
+            return nextmap_first
 
         assert stop >= 0
         assert start >= 0
 
-        if single_nextmap is None:
-            # first transition ever seen, don't initialize all_next
+        if nextmap_first is None:
+            # first transition ever seen, don't initialize nextmap_all
             next = self._make_next_map(w_key, string[start:stop])
-            self.single_nextmap = next
+            self.nextmap_first = next
         else:
-            if self.all_next is None:
+            if self.nextmap_all is None:
                 # 2nd transition ever seen
-                self.all_next = objectmodel.r_dict(unicode_eq, unicode_hash,
+                self.nextmap_all = objectmodel.r_dict(unicode_eq, unicode_hash,
                   force_non_null=True, simple_hash_eq=True)
-                self.all_next[single_nextmap.w_key] = single_nextmap
+                self.nextmap_all[nextmap_first.w_key] = nextmap_first
             else:
-                next = self.all_next.get(w_key, None)
+                next = self.nextmap_all.get(w_key, None)
                 if next is not None:
                     return next
             # if we are at this point we didn't find the transition yet, so
             # create a new one
             next = self._make_next_map(w_key, string[start:stop])
-            self.all_next[w_key] = next
+            self.nextmap_all[w_key] = next
 
             # one new leaf has been created
             self.change_number_of_leaves(1)
@@ -846,13 +854,13 @@
         """ Fast path when parsing the next key: We speculate that we will
         always see a commonly seen next key, and use strcmp (implemented in
         key_repr_cmp) to check whether that is the case. """
-        single_nextmap = self.single_nextmap
-        if single_nextmap:
+        nextmap_first = self.nextmap_first
+        if nextmap_first:
             ll_chars = decoder.ll_chars
-            assert isinstance(single_nextmap, JSONMap)
-            if single_nextmap.key_repr_cmp(ll_chars, position):
-                decoder.pos = position + len(single_nextmap.key_repr)
-                return single_nextmap
+            assert isinstance(nextmap_first, JSONMap)
+            if nextmap_first.key_repr_cmp(ll_chars, position):
+                decoder.pos = position + len(nextmap_first.key_repr)
+                return nextmap_first
         return None
 
     def observe_transition(self, newmap, terminator):
@@ -871,18 +879,18 @@
     def _all_dot(self, output):
         identity = objectmodel.compute_unique_id(self)
         output.append('%s [shape=box%s];' % (identity, self._get_dot_text()))
-        if self.all_next:
-            for w_key, value in self.all_next.items():
+        if self.nextmap_all:
+            for w_key, value in self.nextmap_all.items():
                 assert isinstance(value, JSONMap)
-                if value is self.single_nextmap:
+                if value is self.nextmap_first:
                     color = ", color=blue"
                 else:
                     color = ""
                 output.append('%s -> %s [label="%s"%s];' % (
                     identity, objectmodel.compute_unique_id(value), 
value.w_key._utf8, color))
                 value._all_dot(output)
-        elif self.single_nextmap is not None:
-            value = self.single_nextmap
+        elif self.nextmap_first is not None:
+            value = self.nextmap_first
             output.append('%s -> %s [label="%s", color=blue];' % (
                 identity, objectmodel.compute_unique_id(value), 
value.w_key._utf8))
             value._all_dot(output)
@@ -1000,8 +1008,8 @@
             assert False, "should be unreachable"
 
         if self.state == MapBase.BLOCKED:
-            assert self.single_nextmap is None
-            assert self.all_next is None
+            assert self.nextmap_first is None
+            assert self.nextmap_all is None
         elif self.state == MapBase.FRINGE:
             assert self in self._get_terminator().current_fringe
 
@@ -1016,19 +1024,19 @@
         if was_fringe:
             terminator.remove_from_fringe(self)
         # find the most commonly instantiated child, store it into
-        # single_nextmap and mark it useful, recursively
-        maxchild = self.single_nextmap
-        if self.all_next is not None:
-            for child in self.all_next.itervalues():
+        # nextmap_first and mark it useful, recursively
+        maxchild = self.nextmap_first
+        if self.nextmap_all is not None:
+            for child in self.nextmap_all.itervalues():
                 if child.instantiation_count > maxchild.instantiation_count:
                     maxchild = child
         if maxchild is not None:
             maxchild.mark_useful(terminator)
-            if self.all_next:
-                for child in self.all_next.itervalues():
+            if self.nextmap_all:
+                for child in self.nextmap_all.itervalues():
                     if child is not maxchild:
                         terminator.register_potential_fringe(child)
-                self.single_nextmap = maxchild
+                self.nextmap_first = maxchild
 
     def mark_blocked(self, terminator):
         """ mark self and recursively all its children as blocked."""
@@ -1036,13 +1044,13 @@
         self.state = MapBase.BLOCKED
         if was_fringe:
             terminator.remove_from_fringe(self)
-        if self.all_next:
-            for next in self.all_next.itervalues():
+        if self.nextmap_all:
+            for next in self.nextmap_all.itervalues():
                 next.mark_blocked(terminator)
-        elif self.single_nextmap:
-            self.single_nextmap.mark_blocked(terminator)
-        self.single_nextmap = None
-        self.all_next = None
+        elif self.nextmap_first:
+            self.nextmap_first.mark_blocked(terminator)
+        self.nextmap_first = None
+        self.nextmap_all = None
         self.change_number_of_leaves(-self.number_of_leaves + 1)
 
     def is_state_blocked(self):
@@ -1123,10 +1131,10 @@
     # _____________________________________________________
 
     def _get_dot_text(self):
-        if self.all_next is None:
-            l = int(self.single_nextmap is not None)
+        if self.nextmap_all is None:
+            l = int(self.nextmap_first is not None)
         else:
-            l = len(self.all_next)
+            l = len(self.nextmap_all)
         extra = ""
         if self.decoded_strings:
             extra = "\\n%s/%s (%s%%)" % (self.cache_hits, 
self.decoded_strings, self.cache_hits/float(self.decoded_strings))
diff --git a/pypy/module/_pypyjson/test/test__pypyjson.py 
b/pypy/module/_pypyjson/test/test__pypyjson.py
--- a/pypy/module/_pypyjson/test/test__pypyjson.py
+++ b/pypy/module/_pypyjson/test/test__pypyjson.py
@@ -21,26 +21,26 @@
         w_c = self.space.newutf8("c", 1)
         m1 = m.get_next(w_a, '"a"', 0, 3, m)
         assert m1.w_key == w_a
-        assert m1.single_nextmap is None
+        assert m1.nextmap_first is None
         assert m1.key_repr == '"a"'
         assert m1.key_repr_cmp('"a": 123', 0)
         assert not m1.key_repr_cmp('b": 123', 0)
-        assert m.single_nextmap.w_key == w_a
+        assert m.nextmap_first.w_key == w_a
 
         m2 = m.get_next(w_a, '"a"', 0, 3, m)
         assert m2 is m1
 
         m3 = m.get_next(w_b, '"b"', 0, 3, m)
         assert m3.w_key == w_b
-        assert m3.single_nextmap is None
+        assert m3.nextmap_first is None
         assert m3.key_repr == '"b"'
-        assert m.single_nextmap is m1
+        assert m.nextmap_first is m1
 
         m4 = m3.get_next(w_c, '"c"', 0, 3, m)
         assert m4.w_key == w_c
-        assert m4.single_nextmap is None
+        assert m4.nextmap_first is None
         assert m4.key_repr == '"c"'
-        assert m3.single_nextmap is m4
+        assert m3.nextmap_first is m4
 
     def test_json_map_get_index(self):
         m = Terminator(self.space)
@@ -121,7 +121,7 @@
 
         assert m3.state == MapBase.PRELIMINARY
         m3.instantiation_count = 2
-        assert m2.single_nextmap is m3
+        assert m2.nextmap_first is m3
 
         assert m4.state == MapBase.PRELIMINARY
         m4.instantiation_count = 4
@@ -131,7 +131,7 @@
         assert m2.state == MapBase.USEFUL
         assert m3.state == MapBase.FRINGE
         assert m4.state == MapBase.USEFUL
-        assert m2.single_nextmap is m4
+        assert m2.nextmap_first is m4
 
         assert m1.number_of_leaves == 2
         base._check_invariants()
@@ -194,11 +194,11 @@
         base.cleanup_fringe()
         assert base.current_fringe == dict.fromkeys([m1, m2, m3])
         assert m4.state == MapBase.BLOCKED
-        assert m4.single_nextmap is None
-        assert m4.all_next is None
+        assert m4.nextmap_first is None
+        assert m4.nextmap_all is None
         assert m5.state == MapBase.BLOCKED
-        assert m5.single_nextmap is None
-        assert m5.all_next is None
+        assert m5.nextmap_first is None
+        assert m5.nextmap_all is None
 
     def test_deal_with_blocked(self):
         w_a = self.space.newutf8("a", 1)
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to