Author: Carl Friedrich Bolz-Tereick <[email protected]>
Branch: json-decoder-maps
Changeset: r96759:a809240c1f45
Date: 2019-06-05 15:48 +0200
http://bitbucket.org/pypy/pypy/changeset/a809240c1f45/

Log:    add some comments, some cleanups

diff --git a/pypy/module/_pypyjson/interp_decoder.py 
b/pypy/module/_pypyjson/interp_decoder.py
--- a/pypy/module/_pypyjson/interp_decoder.py
+++ b/pypy/module/_pypyjson/interp_decoder.py
@@ -91,6 +91,10 @@
         self.lru_index = 0
 
         self.startmap = self.space.fromcache(Terminator)
+
+        # keep a list of objects that are created with maps that aren't clearly
+        # useful. If they turn out to be useful in the end we are good,
+        # otherwise convert them to dicts (see .close())
         self.unclear_objects = []
 
         self.scratch = [[None] * self.DEFAULT_SIZE_SCRATCH]  # list of scratch 
space
@@ -102,7 +106,7 @@
         # clean up objects that are instances of now blocked maps
         for w_obj in self.unclear_objects:
             jsonmap = self._get_jsonmap_from_dict(w_obj)
-            if jsonmap.is_blocked():
+            if jsonmap.is_state_blocked():
                 self._devolve_jsonmap_dict(w_obj)
 
     def getslice(self, start, end):
@@ -353,21 +357,20 @@
             i += 1
             if ch == '}':
                 self.pos = i
-                if currmap.is_blocked():
+                self.scratch.append(values_w)  # can reuse next time
+                if currmap.is_state_blocked():
                     currmap.instantiation_count += 1
-                    self.scratch.append(values_w)  # can reuse next time
                     dict_w = self._switch_to_dict(currmap, values_w, nextindex)
                     return self._create_dict(dict_w)
-                self.scratch.append(values_w)  # can reuse next time
                 values_w = values_w[:nextindex]
                 currmap.instantiation_count += 1
                 w_res = self._create_dict_map(values_w, currmap)
-                if currmap.state != MapBase.USEFUL:
+                if not currmap.is_state_useful():
                     self.unclear_objects.append(w_res)
                 return w_res
             elif ch == ',':
                 i = self.skip_whitespace(i)
-                if currmap.is_blocked():
+                if currmap.is_state_blocked():
                     currmap.instantiation_count += 1
                     self.scratch.append(values_w)  # can reuse next time
                     dict_w = self._switch_to_dict(currmap, values_w, nextindex)
@@ -660,6 +663,9 @@
 
 
 class MapBase(object):
+    """ A map implementation to speed up parsing of json dicts, and to
+    represent the resulting dicts more compactly and make access faster. """
+
     # the basic problem we are trying to solve is the following: dicts in
     # json can either be used as objects, or as dictionaries with arbitrary
     # string keys. We want to use maps for the former, but not for the
@@ -674,6 +680,16 @@
     # into a "blocked" map, which is a point in the map tree where we will
     # switch to regular dicts, when we reach that part of the tree.
 
+    # One added complication: We want to keep the number of preliminary maps
+    # bounded to prevent generating tons of useless maps. but also not too
+    # small, to support having a json file that contains many uniform objects
+    # with tons of keys. That's where the idea of "fringe" maps comes into
+    # play. They are maps that sit between known useful nodes and preliminary
+    # nodes in the map transition tree. We bound only the number of fringe
+    # nodes we are considering (to MAX_FRINGE), but not the number of
+    # preliminary maps. When we have too many fringe maps, we remove the least
+    # commonly instantiated fringe map and mark it as blocked.
+
     # allowed graph edges or nodes in all_next:
     #    USEFUL -------
     #   /      \       \
@@ -837,15 +853,19 @@
 
 
 class Terminator(MapBase):
+    """ The root node of the map transition tree. """
     def __init__(self, space):
         MapBase.__init__(self, space)
-        self.all_object_count = 0
+        # a set of all map nodes that are currently in the FRINGE state
         self.current_fringe = {}
 
     def register_potential_fringe(self, prelim):
+        """ add prelim to the fringe, if its prev is either a Terminator or
+        useful. """
         prev = prelim.prev
         if (isinstance(prev, Terminator) or
                 isinstance(prev, JSONMap) and prev.state == MapBase.USEFUL):
+            assert prelim.state == MapBase.PRELIMINARY
             prelim.state = MapBase.FRINGE
 
             if len(self.current_fringe) > MapBase.MAX_FRINGE:
@@ -853,10 +873,12 @@
             self.current_fringe[prelim] = None
 
     def remove_from_fringe(self, former_fringe):
+        """ Remove former_fringe from self.current_fringe. """
         assert former_fringe.state in (MapBase.USEFUL, MapBase.BLOCKED)
         del self.current_fringe[former_fringe]
 
     def cleanup_fringe(self):
+        """ remove the least-instantiated fringe map and block it."""
         min_fringe = None
         min_avg = 10000000000
         for f in self.current_fringe:
@@ -868,6 +890,9 @@
         assert min_fringe
         min_fringe.mark_blocked(self)
 
+    def _check_invariants(self):
+        for fringe in self.current_fringe:
+            assert fringe.state == MapBase.FRINGE
 
 class JSONMap(MapBase):
     """ A map implementation to speed up parsing """
@@ -890,8 +915,7 @@
         self.keys_in_order = None
         self.strategy_instance = None
 
-    @jit.elidable
-    def get_terminator(self):
+    def _get_terminator(self): # only for _check_invariants
         while isinstance(self, JSONMap):
             self = self.prev
         assert isinstance(self, Terminator)
@@ -924,6 +948,8 @@
         if self.state == MapBase.BLOCKED:
             assert self.single_nextmap is None
             assert self.all_next is None
+        elif self.state == MapBase.FRINGE:
+            assert self in self._get_terminator().current_fringe
 
         MapBase._check_invariants(self)
 
@@ -962,9 +988,12 @@
         self.all_next = None
         self.change_number_of_leaves(-self.number_of_leaves + 1)
 
-    def is_blocked(self):
+    def is_state_blocked(self):
         return self.state == MapBase.BLOCKED
 
+    def is_state_useful(self):
+        return self.state == MapBase.USEFUL
+
     def average_instantiation(self):
         return self.instantiation_count / float(self.number_of_leaves)
 
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to