Author: Carl Friedrich Bolz-Tereick <cfb...@gmx.de> Branch: Changeset: r93710:7add90a647dc Date: 2018-01-29 10:43 +0100 http://bitbucket.org/pypy/pypy/changeset/7add90a647dc/
Log: merge mapdict-size-limit stop using mapdict after 80 attributes to prevent linear lookups on attributes when using getattr and settatr to make gigantic instances. instead, switch to using a (string) dictionary. This fixes issue #2728 diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -14,3 +14,12 @@ .. branch: cpyext-datetime2 Support ``tzinfo`` field on C-API datetime objects, fixes latest pandas HEAD + + +.. branch: mapdict-size-limit + +Fix a corner case of mapdict: When an instance is used like a dict (using +``setattr`` and ``getattr``, or ``.__dict__``) and a lot of attributes are +added, then the performance using mapdict is linear in the number of +attributes. This is now fixed (by switching to a regular dict after 80 +attributes). diff --git a/pypy/objspace/std/mapdict.py b/pypy/objspace/std/mapdict.py --- a/pypy/objspace/std/mapdict.py +++ b/pypy/objspace/std/mapdict.py @@ -7,7 +7,7 @@ from pypy.objspace.std.dictmultiobject import ( W_DictMultiObject, DictStrategy, ObjectDictStrategy, BaseKeyIterator, BaseValueIterator, BaseItemIterator, _never_equal_to_string, - W_DictObject, + W_DictObject, BytesDictStrategy, UnicodeDictStrategy ) from pypy.objspace.std.typeobject import MutableCell @@ -25,6 +25,10 @@ # note: we use "x * NUM_DIGITS_POW2" instead of "x << NUM_DIGITS" because # we want to propagate knowledge that the result cannot be negative +# the maximum number of attributes stored in mapdict (afterwards just use a +# dict) +LIMIT_MAP_ATTRIBUTES = 80 + class AbstractAttribute(object): _immutable_fields_ = ['terminator'] @@ -252,6 +256,9 @@ def materialize_r_dict(self, space, obj, dict_w): raise NotImplementedError("abstract base class") + def materialize_str_dict(self, space, obj, str_dict): + raise NotImplementedError("abstract base class") + def remove_dict_entries(self, obj): raise NotImplementedError("abstract base class") @@ -271,6 +278,13 @@ def _write_terminator(self, obj, name, index, w_value): obj._get_mapdict_map().add_attr(obj, name, index, w_value) + if index == DICT and obj._get_mapdict_map().length() >= LIMIT_MAP_ATTRIBUTES: + space = self.space + w_dict = obj.getdict(space) + assert isinstance(w_dict, W_DictMultiObject) + strategy = w_dict.get_strategy() + assert isinstance(strategy, MapDictStrategy) + strategy.switch_to_text_strategy(w_dict) return True def copy(self, obj): @@ -301,6 +315,12 @@ self.devolved_dict_terminator = DevolvedDictTerminator(space, w_cls) def materialize_r_dict(self, space, obj, dict_w): + return self._make_devolved(space) + + def materialize_str_dict(self, space, obj, dict_w): + return self._make_devolved(space) + + def _make_devolved(self, space): result = Object() result.space = space result._mapdict_init_empty(self.devolved_dict_terminator) @@ -407,6 +427,14 @@ self._copy_attr(obj, new_obj) return new_obj + def materialize_str_dict(self, space, obj, str_dict): + new_obj = self.back.materialize_str_dict(space, obj, str_dict) + if self.index == DICT: + str_dict[self.name] = obj._mapdict_read_storage(self.storageindex) + else: + self._copy_attr(obj, new_obj) + return new_obj + def remove_dict_entries(self, obj): new_obj = self.back.remove_dict_entries(obj) if self.index != DICT: @@ -738,6 +766,15 @@ assert w_obj.getdict(self.space) is w_dict or w_obj._get_mapdict_map().terminator.w_cls is None materialize_r_dict(self.space, w_obj, dict_w) + def switch_to_text_strategy(self, w_dict): + w_obj = self.unerase(w_dict.dstorage) + strategy = self.space.fromcache(BytesDictStrategy) + str_dict = strategy.unerase(strategy.get_empty_storage()) + w_dict.set_strategy(strategy) + w_dict.dstorage = strategy.erase(str_dict) + assert w_obj.getdict(self.space) is w_dict or w_obj._get_mapdict_map().terminator.w_cls is None + materialize_str_dict(self.space, w_obj, str_dict) + def getitem(self, w_dict, w_key): space = self.space w_lookup_type = space.type(w_key) @@ -833,6 +870,11 @@ new_obj = map.materialize_r_dict(space, obj, dict_w) obj._set_mapdict_storage_and_map(new_obj.storage, new_obj.map) +def materialize_str_dict(space, obj, dict_w): + map = obj._get_mapdict_map() + new_obj = map.materialize_str_dict(space, obj, dict_w) + obj._set_mapdict_storage_and_map(new_obj.storage, new_obj.map) + class IteratorMixin(object): diff --git a/pypy/objspace/std/test/test_mapdict.py b/pypy/objspace/std/test/test_mapdict.py --- a/pypy/objspace/std/test/test_mapdict.py +++ b/pypy/objspace/std/test/test_mapdict.py @@ -111,6 +111,33 @@ assert obj2.getdictvalue(space, "b") == 60 assert obj2.map is obj.map +def test_add_attribute_limit(): + for numslots in [0, 10, 100]: + cls = Class() + obj = cls.instantiate() + for i in range(numslots): + obj.setslotvalue(i, i) # some extra slots too, sometimes + # test that eventually attributes are really just stored in a dictionary + for i in range(1000): + obj.setdictvalue(space, str(i), i) + # moved to dict (which is the remaining non-slot item) + assert len(obj.storage) == 1 + numslots + + for i in range(1000): + assert obj.getdictvalue(space, str(i)) == i + for i in range(numslots): + assert obj.getslotvalue(i) == i # check extra slots + + # this doesn't happen with slots + cls = Class() + obj = cls.instantiate() + for i in range(1000): + obj.setslotvalue(i, i) + assert len(obj.storage) == 1000 + + for i in range(1000): + assert obj.getslotvalue(i) == i + def test_insert_different_orders(): cls = Class() obj = cls.instantiate() _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit