Author: Carl Friedrich Bolz <cfb...@gmx.de> Branch: Changeset: r82386:572984f4ec26 Date: 2016-02-22 11:53 +0100 http://bitbucket.org/pypy/pypy/changeset/572984f4ec26/
Log: merge reorder-map-attributes: when creating an instance with attributes in a different order than some previously existing ordering, switch to that ordering. this reduces the number of bridges in some situations. diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -128,6 +128,7 @@ Fix SSL tests by importing cpython's patch + .. branch: remove-getfield-pure Remove pure variants of ``getfield_gc_*`` operations from the JIT. Relevant @@ -163,3 +164,10 @@ .. branch: windows-vmprof-support vmprof should work on Windows. + + +.. branch: reorder-map-attributes + +When creating instances and adding attributes in several different orders +depending on some condition, the JIT would create too much code. This is now +fixed. \ No newline at end of file diff --git a/pypy/objspace/std/mapdict.py b/pypy/objspace/std/mapdict.py --- a/pypy/objspace/std/mapdict.py +++ b/pypy/objspace/std/mapdict.py @@ -1,4 +1,4 @@ -import weakref +import weakref, sys from rpython.rlib import jit, objectmodel, debug, rerased from rpython.rlib.rarithmetic import intmask, r_uint @@ -12,6 +12,11 @@ from pypy.objspace.std.typeobject import MutableCell +erase_item, unerase_item = rerased.new_erasing_pair("mapdict storage item") +erase_map, unerase_map = rerased.new_erasing_pair("map") +erase_list, unerase_list = rerased.new_erasing_pair("mapdict storage list") + + # ____________________________________________________________ # attribute shapes @@ -20,6 +25,7 @@ # note: we use "x * NUM_DIGITS_POW2" instead of "x << NUM_DIGITS" because # we want to propagate knowledge that the result cannot be negative + class AbstractAttribute(object): _immutable_fields_ = ['terminator'] cache_attrs = None @@ -151,29 +157,124 @@ cache[name, index] = attr return attr + @jit.elidable + def _get_cache_attr(self, name, index): + key = name, index + # this method is not actually elidable, but it's fine anyway + if self.cache_attrs is not None: + return self.cache_attrs.get(key, None) + return None + + def add_attr(self, obj, name, index, w_value): + self._reorder_and_add(obj, name, index, w_value) + if not jit.we_are_jitted(): + oldattr = self + attr = obj._get_mapdict_map() + size_est = (oldattr._size_estimate + attr.size_estimate() + - oldattr.size_estimate()) + assert size_est >= (oldattr.length() * NUM_DIGITS_POW2) + oldattr._size_estimate = size_est + + def _add_attr_without_reordering(self, obj, name, index, w_value): + attr = self._get_new_attr(name, index) + attr._switch_map_and_write_storage(obj, w_value) + + @jit.unroll_safe + def _switch_map_and_write_storage(self, obj, w_value): + if self.length() > obj._mapdict_storage_length(): + # note that self.size_estimate() is always at least self.length() + new_storage = [None] * self.size_estimate() + for i in range(obj._mapdict_storage_length()): + new_storage[i] = obj._mapdict_read_storage(i) + obj._set_mapdict_storage_and_map(new_storage, self) + + # the order is important here: first change the map, then the storage, + # for the benefit of the special subclasses + obj._set_mapdict_map(self) + obj._mapdict_write_storage(self.storageindex, w_value) + + + @jit.elidable + def _find_branch_to_move_into(self, name, index): + # walk up the map chain to find an ancestor with lower order that + # already has the current name as a child inserted + current_order = sys.maxint + number_to_readd = 0 + current = self + key = (name, index) + while True: + attr = None + if current.cache_attrs is not None: + attr = current.cache_attrs.get(key, None) + if attr is None or attr.order > current_order: + # we reached the top, so we didn't find it anywhere, + # just add it to the top attribute + if not isinstance(current, PlainAttribute): + return 0, self._get_new_attr(name, index) + + else: + return number_to_readd, attr + # if not found try parent + number_to_readd += 1 + current_order = current.order + current = current.back + @jit.look_inside_iff(lambda self, obj, name, index, w_value: jit.isconstant(self) and jit.isconstant(name) and jit.isconstant(index)) - def add_attr(self, obj, name, index, w_value): - attr = self._get_new_attr(name, index) - oldattr = obj._get_mapdict_map() - if not jit.we_are_jitted(): - size_est = (oldattr._size_estimate + attr.size_estimate() - - oldattr.size_estimate()) - assert size_est >= (oldattr.length() * NUM_DIGITS_POW2) - oldattr._size_estimate = size_est - if attr.length() > obj._mapdict_storage_length(): - # note that attr.size_estimate() is always at least attr.length() - new_storage = [None] * attr.size_estimate() - for i in range(obj._mapdict_storage_length()): - new_storage[i] = obj._mapdict_read_storage(i) - obj._set_mapdict_storage_and_map(new_storage, attr) + def _reorder_and_add(self, obj, name, index, w_value): + # the idea is as follows: the subtrees of any map are ordered by + # insertion. the invariant is that subtrees that are inserted later + # must not contain the name of the attribute of any earlier inserted + # attribute anywhere + # m______ + # inserted first / \ ... \ further attributes + # attrname a 0/ 1\ n\ + # m a must not appear here anywhere + # + # when inserting a new attribute in an object we check whether any + # parent of lower order has seen that attribute yet. if yes, we follow + # that branch. if not, we normally append that attribute. When we + # follow a prior branch, we necessarily remove some attributes to be + # able to do that. They need to be re-added, which has to follow the + # reordering procedure recusively. - # the order is important here: first change the map, then the storage, - # for the benefit of the special subclasses - obj._set_mapdict_map(attr) - obj._mapdict_write_storage(attr.storageindex, w_value) + # we store the to-be-readded attribute in the stack, with the map and + # the value paired up those are lazily initialized to a list large + # enough to store all current attributes + stack = None + stack_index = 0 + while True: + current = self + number_to_readd = 0 + number_to_readd, attr = self._find_branch_to_move_into(name, index) + # we found the attributes further up, need to save the + # previous values of the attributes we passed + if number_to_readd: + if stack is None: + stack = [erase_map(None)] * (self.length() * 2) + current = self + for i in range(number_to_readd): + assert isinstance(current, PlainAttribute) + w_self_value = obj._mapdict_read_storage( + current.storageindex) + stack[stack_index] = erase_map(current) + stack[stack_index + 1] = erase_item(w_self_value) + stack_index += 2 + current = current.back + attr._switch_map_and_write_storage(obj, w_value) + + if not stack_index: + return + + # readd the current top of the stack + stack_index -= 2 + next_map = unerase_map(stack[stack_index]) + w_value = unerase_item(stack[stack_index + 1]) + name = next_map.name + index = next_map.index + self = obj._get_mapdict_map() def materialize_r_dict(self, space, obj, dict_w): raise NotImplementedError("abstract base class") @@ -279,7 +380,7 @@ return Terminator.set_terminator(self, obj, terminator) class PlainAttribute(AbstractAttribute): - _immutable_fields_ = ['name', 'index', 'storageindex', 'back', 'ever_mutated?'] + _immutable_fields_ = ['name', 'index', 'storageindex', 'back', 'ever_mutated?', 'order'] def __init__(self, name, index, back): AbstractAttribute.__init__(self, back.space, back.terminator) @@ -289,6 +390,7 @@ self.back = back self._size_estimate = self.length() * NUM_DIGITS_POW2 self.ever_mutated = False + self.order = len(back.cache_attrs) if back.cache_attrs else 0 def _copy_attr(self, obj, new_obj): w_value = self.read(obj, self.name, self.index) @@ -542,9 +644,6 @@ memo_get_subclass_of_correct_size._annspecialcase_ = "specialize:memo" _subclass_cache = {} -erase_item, unerase_item = rerased.new_erasing_pair("mapdict storage item") -erase_list, unerase_list = rerased.new_erasing_pair("mapdict storage list") - def _make_subclass_size_n(supercls, n): from rpython.rlib import unroll rangen = unroll.unrolling_iterable(range(n)) diff --git a/pypy/objspace/std/test/test_mapdict.py b/pypy/objspace/std/test/test_mapdict.py --- a/pypy/objspace/std/test/test_mapdict.py +++ b/pypy/objspace/std/test/test_mapdict.py @@ -107,6 +107,153 @@ assert obj2.getdictvalue(space, "b") == 60 assert obj2.map is obj.map +def test_insert_different_orders(): + cls = Class() + obj = cls.instantiate() + obj.setdictvalue(space, "a", 10) + obj.setdictvalue(space, "b", 20) + + obj2 = cls.instantiate() + obj2.setdictvalue(space, "b", 30) + obj2.setdictvalue(space, "a", 40) + + assert obj.map is obj2.map + +def test_insert_different_orders_2(): + cls = Class() + obj = cls.instantiate() + obj2 = cls.instantiate() + + obj.setdictvalue(space, "a", 10) + + obj2.setdictvalue(space, "b", 20) + obj2.setdictvalue(space, "a", 30) + + obj.setdictvalue(space, "b", 40) + assert obj.map is obj2.map + +def test_insert_different_orders_3(): + cls = Class() + obj = cls.instantiate() + obj2 = cls.instantiate() + obj3 = cls.instantiate() + obj4 = cls.instantiate() + obj5 = cls.instantiate() + obj6 = cls.instantiate() + + obj.setdictvalue(space, "a", 10) + obj.setdictvalue(space, "b", 20) + obj.setdictvalue(space, "c", 30) + + obj2.setdictvalue(space, "a", 30) + obj2.setdictvalue(space, "c", 40) + obj2.setdictvalue(space, "b", 50) + + obj3.setdictvalue(space, "c", 30) + obj3.setdictvalue(space, "a", 40) + obj3.setdictvalue(space, "b", 50) + + obj4.setdictvalue(space, "c", 30) + obj4.setdictvalue(space, "b", 40) + obj4.setdictvalue(space, "a", 50) + + obj5.setdictvalue(space, "b", 30) + obj5.setdictvalue(space, "a", 40) + obj5.setdictvalue(space, "c", 50) + + obj6.setdictvalue(space, "b", 30) + obj6.setdictvalue(space, "c", 40) + obj6.setdictvalue(space, "a", 50) + + assert obj.map is obj2.map + assert obj.map is obj3.map + assert obj.map is obj4.map + assert obj.map is obj5.map + assert obj.map is obj6.map + + +def test_insert_different_orders_4(): + cls = Class() + obj = cls.instantiate() + obj2 = cls.instantiate() + + obj.setdictvalue(space, "a", 10) + obj.setdictvalue(space, "b", 20) + obj.setdictvalue(space, "c", 30) + obj.setdictvalue(space, "d", 40) + + obj2.setdictvalue(space, "d", 50) + obj2.setdictvalue(space, "c", 50) + obj2.setdictvalue(space, "b", 50) + obj2.setdictvalue(space, "a", 50) + + assert obj.map is obj2.map + +def test_insert_different_orders_5(): + cls = Class() + obj = cls.instantiate() + obj2 = cls.instantiate() + + obj.setdictvalue(space, "a", 10) + obj.setdictvalue(space, "b", 20) + obj.setdictvalue(space, "c", 30) + obj.setdictvalue(space, "d", 40) + + obj2.setdictvalue(space, "d", 50) + obj2.setdictvalue(space, "c", 50) + obj2.setdictvalue(space, "b", 50) + obj2.setdictvalue(space, "a", 50) + + obj3 = cls.instantiate() + obj3.setdictvalue(space, "d", 50) + obj3.setdictvalue(space, "c", 50) + obj3.setdictvalue(space, "b", 50) + obj3.setdictvalue(space, "a", 50) + + assert obj.map is obj3.map + + +def test_bug_stack_overflow_insert_attributes(): + cls = Class() + obj = cls.instantiate() + + for i in range(1000): + obj.setdictvalue(space, str(i), i) + + +def test_insert_different_orders_perm(): + from itertools import permutations + cls = Class() + seen_maps = {} + for preexisting in ['', 'x', 'xy']: + for i, attributes in enumerate(permutations("abcdef")): + obj = cls.instantiate() + for i, attr in enumerate(preexisting): + obj.setdictvalue(space, attr, i*1000) + key = preexisting + for j, attr in enumerate(attributes): + obj.setdictvalue(space, attr, i*10+j) + key = "".join(sorted(key+attr)) + if key in seen_maps: + assert obj.map is seen_maps[key] + else: + seen_maps[key] = obj.map + + print len(seen_maps) + + +def test_bug_infinite_loop(): + cls = Class() + obj = cls.instantiate() + obj.setdictvalue(space, "e", 1) + obj2 = cls.instantiate() + obj2.setdictvalue(space, "f", 2) + obj3 = cls.instantiate() + obj3.setdictvalue(space, "a", 3) + obj3.setdictvalue(space, "e", 4) + obj3.setdictvalue(space, "f", 5) + + def test_attr_immutability(monkeypatch): cls = Class() obj = cls.instantiate() @@ -359,9 +506,15 @@ class TestMapDictImplementation(BaseTestRDictImplementation): StrategyClass = MapDictStrategy get_impl = get_impl + def test_setdefault_fast(self): + # mapdict can't pass this, which is fine + pass class TestDevolvedMapDictImplementation(BaseTestDevolvedDictImplementation): get_impl = get_impl StrategyClass = MapDictStrategy + def test_setdefault_fast(self): + # mapdict can't pass this, which is fine + pass # ___________________________________________________________ # tests that check the obj interface after the dict has devolved @@ -1132,3 +1285,7 @@ class TestMapDictImplementationUsingnewdict(BaseTestRDictImplementation): StrategyClass = MapDictStrategy # NB: the get_impl method is not overwritten here, as opposed to above + + def test_setdefault_fast(self): + # mapdict can't pass this, which is fine + pass _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit