Author: Carl Friedrich Bolz-Tereick <cfb...@gmx.de>
Branch: 
Changeset: r93710:7add90a647dc
Date: 2018-01-29 10:43 +0100
http://bitbucket.org/pypy/pypy/changeset/7add90a647dc/

Log:    merge mapdict-size-limit

        stop using mapdict after 80 attributes to prevent linear lookups on
        attributes when using getattr and settatr to make gigantic
        instances. instead, switch to using a (string) dictionary. This
        fixes issue #2728

diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -14,3 +14,12 @@
 .. branch: cpyext-datetime2
 
 Support ``tzinfo`` field on C-API datetime objects, fixes latest pandas HEAD
+
+
+.. branch: mapdict-size-limit
+
+Fix a corner case of mapdict: When an instance is used like a dict (using
+``setattr`` and ``getattr``, or ``.__dict__``) and a lot of attributes are
+added, then the performance using mapdict is linear in the number of
+attributes. This is now fixed (by switching to a regular dict after 80
+attributes).
diff --git a/pypy/objspace/std/mapdict.py b/pypy/objspace/std/mapdict.py
--- a/pypy/objspace/std/mapdict.py
+++ b/pypy/objspace/std/mapdict.py
@@ -7,7 +7,7 @@
 from pypy.objspace.std.dictmultiobject import (
     W_DictMultiObject, DictStrategy, ObjectDictStrategy, BaseKeyIterator,
     BaseValueIterator, BaseItemIterator, _never_equal_to_string,
-    W_DictObject,
+    W_DictObject, BytesDictStrategy, UnicodeDictStrategy
 )
 from pypy.objspace.std.typeobject import MutableCell
 
@@ -25,6 +25,10 @@
 # note: we use "x * NUM_DIGITS_POW2" instead of "x << NUM_DIGITS" because
 # we want to propagate knowledge that the result cannot be negative
 
+# the maximum number of attributes stored in mapdict (afterwards just use a
+# dict)
+LIMIT_MAP_ATTRIBUTES = 80
+
 
 class AbstractAttribute(object):
     _immutable_fields_ = ['terminator']
@@ -252,6 +256,9 @@
     def materialize_r_dict(self, space, obj, dict_w):
         raise NotImplementedError("abstract base class")
 
+    def materialize_str_dict(self, space, obj, str_dict):
+        raise NotImplementedError("abstract base class")
+
     def remove_dict_entries(self, obj):
         raise NotImplementedError("abstract base class")
 
@@ -271,6 +278,13 @@
 
     def _write_terminator(self, obj, name, index, w_value):
         obj._get_mapdict_map().add_attr(obj, name, index, w_value)
+        if index == DICT and obj._get_mapdict_map().length() >= 
LIMIT_MAP_ATTRIBUTES:
+            space = self.space
+            w_dict = obj.getdict(space)
+            assert isinstance(w_dict, W_DictMultiObject)
+            strategy = w_dict.get_strategy()
+            assert isinstance(strategy, MapDictStrategy)
+            strategy.switch_to_text_strategy(w_dict)
         return True
 
     def copy(self, obj):
@@ -301,6 +315,12 @@
         self.devolved_dict_terminator = DevolvedDictTerminator(space, w_cls)
 
     def materialize_r_dict(self, space, obj, dict_w):
+        return self._make_devolved(space)
+
+    def materialize_str_dict(self, space, obj, dict_w):
+        return self._make_devolved(space)
+
+    def _make_devolved(self, space):
         result = Object()
         result.space = space
         result._mapdict_init_empty(self.devolved_dict_terminator)
@@ -407,6 +427,14 @@
             self._copy_attr(obj, new_obj)
         return new_obj
 
+    def materialize_str_dict(self, space, obj, str_dict):
+        new_obj = self.back.materialize_str_dict(space, obj, str_dict)
+        if self.index == DICT:
+            str_dict[self.name] = obj._mapdict_read_storage(self.storageindex)
+        else:
+            self._copy_attr(obj, new_obj)
+        return new_obj
+
     def remove_dict_entries(self, obj):
         new_obj = self.back.remove_dict_entries(obj)
         if self.index != DICT:
@@ -738,6 +766,15 @@
         assert w_obj.getdict(self.space) is w_dict or 
w_obj._get_mapdict_map().terminator.w_cls is None
         materialize_r_dict(self.space, w_obj, dict_w)
 
+    def switch_to_text_strategy(self, w_dict):
+        w_obj = self.unerase(w_dict.dstorage)
+        strategy = self.space.fromcache(BytesDictStrategy)
+        str_dict = strategy.unerase(strategy.get_empty_storage())
+        w_dict.set_strategy(strategy)
+        w_dict.dstorage = strategy.erase(str_dict)
+        assert w_obj.getdict(self.space) is w_dict or 
w_obj._get_mapdict_map().terminator.w_cls is None
+        materialize_str_dict(self.space, w_obj, str_dict)
+
     def getitem(self, w_dict, w_key):
         space = self.space
         w_lookup_type = space.type(w_key)
@@ -833,6 +870,11 @@
     new_obj = map.materialize_r_dict(space, obj, dict_w)
     obj._set_mapdict_storage_and_map(new_obj.storage, new_obj.map)
 
+def materialize_str_dict(space, obj, dict_w):
+    map = obj._get_mapdict_map()
+    new_obj = map.materialize_str_dict(space, obj, dict_w)
+    obj._set_mapdict_storage_and_map(new_obj.storage, new_obj.map)
+
 
 class IteratorMixin(object):
 
diff --git a/pypy/objspace/std/test/test_mapdict.py 
b/pypy/objspace/std/test/test_mapdict.py
--- a/pypy/objspace/std/test/test_mapdict.py
+++ b/pypy/objspace/std/test/test_mapdict.py
@@ -111,6 +111,33 @@
     assert obj2.getdictvalue(space, "b") == 60
     assert obj2.map is obj.map
 
+def test_add_attribute_limit():
+    for numslots in [0, 10, 100]:
+        cls = Class()
+        obj = cls.instantiate()
+        for i in range(numslots):
+            obj.setslotvalue(i, i) # some extra slots too, sometimes
+        # test that eventually attributes are really just stored in a 
dictionary
+        for i in range(1000):
+            obj.setdictvalue(space, str(i), i)
+        # moved to dict (which is the remaining non-slot item)
+        assert len(obj.storage) == 1 + numslots
+
+        for i in range(1000):
+            assert obj.getdictvalue(space, str(i)) == i
+        for i in range(numslots):
+            assert obj.getslotvalue(i) == i # check extra slots
+
+    # this doesn't happen with slots
+    cls = Class()
+    obj = cls.instantiate()
+    for i in range(1000):
+        obj.setslotvalue(i, i)
+    assert len(obj.storage) == 1000
+
+    for i in range(1000):
+        assert obj.getslotvalue(i) == i
+
 def test_insert_different_orders():
     cls = Class()
     obj = cls.instantiate()
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to