Author: Armin Rigo <ar...@tunes.org> Branch: py3.5 Changeset: r89934:5e325b5c62d9 Date: 2017-02-05 08:29 +0100 http://bitbucket.org/pypy/pypy/changeset/5e325b5c62d9/
Log: hg merge dict-move-to-end diff --git a/pypy/doc/stackless.rst b/pypy/doc/stackless.rst --- a/pypy/doc/stackless.rst +++ b/pypy/doc/stackless.rst @@ -190,7 +190,7 @@ from GC issues: if the program "forgets" an unfinished greenlet, it will always be collected at the next garbage collection. -.. _documentation of the greenlets: http://packages.python.org/greenlet/ +.. _documentation of the greenlets: https://greenlet.readthedocs.io/ Unimplemented features diff --git a/rpython/annotator/unaryop.py b/rpython/annotator/unaryop.py --- a/rpython/annotator/unaryop.py +++ b/rpython/annotator/unaryop.py @@ -14,7 +14,7 @@ SomeUnicodeCodePoint, SomeInstance, SomeBuiltin, SomeBuiltinMethod, SomeFloat, SomeIterator, SomePBC, SomeNone, SomeTypeOf, s_ImpossibleValue, s_Bool, s_None, s_Int, unionof, add_knowntypedata, - SomeWeakRef, SomeUnicodeString, SomeByteArray) + SomeWeakRef, SomeUnicodeString, SomeByteArray, SomeOrderedDict) from rpython.annotator.bookkeeper import getbookkeeper, immutablevalue from rpython.annotator.binaryop import _clone ## XXX where to put this? from rpython.annotator.binaryop import _dict_can_only_throw_keyerror @@ -575,6 +575,13 @@ pair(self, s_key).delitem() method_delitem_with_hash.can_only_throw = _dict_can_only_throw_keyerror +class __extend__(SomeOrderedDict): + + def method_move_to_end(self, s_key, s_last): + assert s_Bool.contains(s_last) + pair(self, s_key).delitem() + method_move_to_end.can_only_throw = _dict_can_only_throw_keyerror + @op.contains.register(SomeString) @op.contains.register(SomeUnicodeString) def contains_String(annotator, string, char): diff --git a/rpython/rlib/objectmodel.py b/rpython/rlib/objectmodel.py --- a/rpython/rlib/objectmodel.py +++ b/rpython/rlib/objectmodel.py @@ -934,6 +934,24 @@ return d.delitem_with_hash(key, h) +def _untranslated_move_to_end(d, key, last): + "NOT_RPYTHON" + value = d.pop(key) + if last: + d[key] = value + else: + items = d.items() + d.clear() + d[key] = value + d.update(items) + +@specialize.call_location() +def move_to_end(d, key, last=True): + if not we_are_translated(): + _untranslated_move_to_end(d, key, last) + return + d.move_to_end(key, last) + # ____________________________________________________________ def import_from_mixin(M, special_methods=['__init__', '__del__']): diff --git a/rpython/rlib/test/test_objectmodel.py b/rpython/rlib/test/test_objectmodel.py --- a/rpython/rlib/test/test_objectmodel.py +++ b/rpython/rlib/test/test_objectmodel.py @@ -7,7 +7,7 @@ resizelist_hint, is_annotation_constant, always_inline, NOT_CONSTANT, iterkeys_with_hash, iteritems_with_hash, contains_with_hash, setitem_with_hash, getitem_with_hash, delitem_with_hash, import_from_mixin, - fetch_translated_config, try_inline) + fetch_translated_config, try_inline, move_to_end) from rpython.translator.translator import TranslationContext, graphof from rpython.rtyper.test.tool import BaseRtypingTest from rpython.rtyper.test.test_llinterp import interpret @@ -679,6 +679,16 @@ assert f(29) == 0 interpret(f, [27]) +def test_rordereddict_move_to_end(): + d = OrderedDict() + d['key1'] = 'val1' + d['key2'] = 'val2' + d['key3'] = 'val3' + move_to_end(d, 'key1') + assert d.items() == [('key2', 'val2'), ('key3', 'val3'), ('key1', 'val1')] + move_to_end(d, 'key1', last=False) + assert d.items() == [('key1', 'val1'), ('key2', 'val2'), ('key3', 'val3')] + def test_import_from_mixin(): class M: # old-style def f(self): diff --git a/rpython/rtyper/lltypesystem/rordereddict.py b/rpython/rtyper/lltypesystem/rordereddict.py --- a/rpython/rtyper/lltypesystem/rordereddict.py +++ b/rpython/rtyper/lltypesystem/rordereddict.py @@ -407,6 +407,15 @@ hop.exception_is_here() hop.gendirectcall(ll_dict_delitem_with_hash, v_dict, v_key, v_hash) + def rtype_method_move_to_end(self, hop): + v_dict, v_key, v_last = hop.inputargs( + self, self.key_repr, lltype.Bool) + if not self.custom_eq_hash: + hop.has_implicit_exception(KeyError) # record that we know about it + hop.exception_is_here() + hop.gendirectcall(ll_dict_move_to_end, v_dict, v_key, v_last) + + class __extend__(pairtype(OrderedDictRepr, rmodel.Repr)): def rtype_getitem((r_dict, r_key), hop): @@ -542,16 +551,18 @@ ll_assert(False, "ll_call_insert_clean_function(): invalid lookup_fun") assert False -def ll_call_delete_by_entry_index(d, hash, i): +def ll_call_delete_by_entry_index(d, hash, i, replace_with): + # only called from _ll_dict_del, whose @jit.look_inside_iff + # condition should control when we get inside here with the jit fun = d.lookup_function_no & FUNC_MASK if fun == FUNC_BYTE: - ll_dict_delete_by_entry_index(d, hash, i, TYPE_BYTE) + ll_dict_delete_by_entry_index(d, hash, i, replace_with, TYPE_BYTE) elif fun == FUNC_SHORT: - ll_dict_delete_by_entry_index(d, hash, i, TYPE_SHORT) + ll_dict_delete_by_entry_index(d, hash, i, replace_with, TYPE_SHORT) elif IS_64BIT and fun == FUNC_INT: - ll_dict_delete_by_entry_index(d, hash, i, TYPE_INT) + ll_dict_delete_by_entry_index(d, hash, i, replace_with, TYPE_INT) elif fun == FUNC_LONG: - ll_dict_delete_by_entry_index(d, hash, i, TYPE_LONG) + ll_dict_delete_by_entry_index(d, hash, i, replace_with, TYPE_LONG) else: # can't be still FUNC_MUST_REINDEX here ll_assert(False, "ll_call_delete_by_entry_index(): invalid lookup_fun") @@ -805,13 +816,12 @@ ll_dict_delitem_with_hash(d, key, d.keyhash(key)) def ll_dict_delitem_with_hash(d, key, hash): - index = d.lookup_function(d, key, hash, FLAG_DELETE) + index = d.lookup_function(d, key, hash, FLAG_LOOKUP) if index < 0: raise KeyError - _ll_dict_del(d, index) + _ll_dict_del(d, hash, index) -@jit.look_inside_iff(lambda d, i: jit.isvirtual(d) and jit.isconstant(i)) -def _ll_dict_del(d, index): +def _ll_dict_del_entry(d, index): d.entries.mark_deleted(index) d.num_live_items -= 1 # clear the key and the value if they are GC pointers @@ -823,6 +833,11 @@ if ENTRIES.must_clear_value: entry.value = lltype.nullptr(ENTRY.value.TO) +@jit.look_inside_iff(lambda d, h, i: jit.isvirtual(d) and jit.isconstant(i)) +def _ll_dict_del(d, hash, index): + ll_call_delete_by_entry_index(d, hash, index, DELETED) + _ll_dict_del_entry(d, index) + if d.num_live_items == 0: # Dict is now empty. Reset these fields. d.num_ever_used_items = 0 @@ -963,7 +978,6 @@ FLAG_LOOKUP = 0 FLAG_STORE = 1 -FLAG_DELETE = 2 @specialize.memo() def _ll_ptr_to_array_of(T): @@ -985,8 +999,6 @@ if index >= VALID_OFFSET: checkingkey = entries[index - VALID_OFFSET].key if direct_compare and checkingkey == key: - if store_flag == FLAG_DELETE: - indexes[i] = rffi.cast(T, DELETED) return index - VALID_OFFSET # found the entry if d.keyeq is not None and entries.hash(index - VALID_OFFSET) == hash: # correct hash, maybe the key is e.g. a different pointer to @@ -1000,8 +1012,6 @@ # the compare did major nasty stuff to the dict: start over return ll_dict_lookup(d, key, hash, store_flag, T) if found: - if store_flag == FLAG_DELETE: - indexes[i] = rffi.cast(T, DELETED) return index - VALID_OFFSET deletedslot = -1 elif index == DELETED: @@ -1030,8 +1040,6 @@ elif index >= VALID_OFFSET: checkingkey = entries[index - VALID_OFFSET].key if direct_compare and checkingkey == key: - if store_flag == FLAG_DELETE: - indexes[i] = rffi.cast(T, DELETED) return index - VALID_OFFSET # found the entry if d.keyeq is not None and entries.hash(index - VALID_OFFSET) == hash: # correct hash, maybe the key is e.g. a different pointer to @@ -1044,8 +1052,6 @@ # the compare did major nasty stuff to the dict: start over return ll_dict_lookup(d, key, hash, store_flag, T) if found: - if store_flag == FLAG_DELETE: - indexes[i] = rffi.cast(T, DELETED) return index - VALID_OFFSET elif deletedslot == -1: deletedslot = intmask(i) @@ -1066,7 +1072,11 @@ perturb >>= PERTURB_SHIFT indexes[i] = rffi.cast(T, index + VALID_OFFSET) -def ll_dict_delete_by_entry_index(d, hash, locate_index, T): +# the following function is only called from _ll_dict_del, whose +# @jit.look_inside_iff condition should control when we get inside +# here with the jit +@jit.unroll_safe +def ll_dict_delete_by_entry_index(d, hash, locate_index, replace_with, T): # Another simplified version of ll_dict_lookup() which locates a # hashtable entry with the given 'index' stored in it, and deletes it. # This *should* be safe against evil user-level __eq__/__hash__ @@ -1083,7 +1093,7 @@ i = (i << 2) + i + perturb + 1 i = i & mask perturb >>= PERTURB_SHIFT - indexes[i] = rffi.cast(T, DELETED) + indexes[i] = rffi.cast(T, replace_with) # ____________________________________________________________ # @@ -1253,18 +1263,8 @@ if hasattr(DICT, 'fnkeyhash'): newdict.fnkeyhash = dict.fnkeyhash - i = 0 - while i < newdict.num_ever_used_items: - d_entry = newdict.entries[i] - entry = dict.entries[i] - ENTRY = lltype.typeOf(newdict.entries).TO.OF - d_entry.key = entry.key - if hasattr(ENTRY, 'f_valid'): - d_entry.f_valid = entry.f_valid - d_entry.value = entry.value - if hasattr(ENTRY, 'f_hash'): - d_entry.f_hash = entry.f_hash - i += 1 + rgc.ll_arraycopy(dict.entries, newdict.entries, 0, 0, + newdict.num_ever_used_items) ll_dict_reindex(newdict, _ll_len_of_d_indexes(dict)) return newdict @@ -1390,8 +1390,6 @@ break dic.num_ever_used_items -= 1 - # we must remove the precise entry in the hashtable that points to 'i' - ll_call_delete_by_entry_index(dic, entries.hash(i), i) return i def ll_dict_popitem(ELEM, dic): @@ -1400,21 +1398,139 @@ r = lltype.malloc(ELEM.TO) r.item0 = recast(ELEM.TO.item0, entry.key) r.item1 = recast(ELEM.TO.item1, entry.value) - _ll_dict_del(dic, i) + _ll_dict_del(dic, dic.entries.hash(i), i) return r def ll_dict_pop(dic, key): - index = dic.lookup_function(dic, key, dic.keyhash(key), FLAG_DELETE) + hash = dic.keyhash(key) + index = dic.lookup_function(dic, key, hash, FLAG_LOOKUP) if index < 0: raise KeyError value = dic.entries[index].value - _ll_dict_del(dic, index) + _ll_dict_del(dic, hash, index) return value def ll_dict_pop_default(dic, key, dfl): - index = dic.lookup_function(dic, key, dic.keyhash(key), FLAG_DELETE) + hash = dic.keyhash(key) + index = dic.lookup_function(dic, key, hash, FLAG_LOOKUP) if index < 0: return dfl value = dic.entries[index].value - _ll_dict_del(dic, index) + _ll_dict_del(dic, hash, index) return value + +def ll_dict_move_to_end(d, key, last): + if last: + ll_dict_move_to_last(d, key) + else: + ll_dict_move_to_first(d, key) + +def ll_dict_move_to_last(d, key): + hash = d.keyhash(key) + old_index = d.lookup_function(d, key, hash, FLAG_LOOKUP) + if old_index < 0: + raise KeyError + + if old_index == d.num_ever_used_items - 1: + return + + # remove the entry at the old position + old_entry = d.entries[old_index] + key = old_entry.key + value = old_entry.value + _ll_dict_del_entry(d, old_index) + + # note a corner case: it is possible that 'replace_with' is just too + # large to fit in the type T used so far for the index. But in that + # case, the list 'd.entries' is full, and the following call to + # _ll_dict_setitem_lookup_done() will necessarily reindex the dict. + # So in that case, this value of 'replace_with' should be ignored. + ll_call_delete_by_entry_index(d, hash, old_index, + replace_with = VALID_OFFSET + d.num_ever_used_items) + _ll_dict_setitem_lookup_done(d, key, value, hash, -1) + +def ll_dict_move_to_first(d, key): + # In this function, we might do a bit more than the strict minimum + # of walks over parts of the array, trying to keep the code at least + # semi-reasonable, while the goal is still amortized constant-time + # over many calls. + + # Call ll_dict_remove_deleted_items() first if there are too many + # deleted items. Not a perfect solution, because lookup_function() + # might do random things with the dict and create many new deleted + # items. Still, should be fine, because nothing crucially depends + # on this: the goal is to avoid the dictionary's list growing + # forever. + if d.num_live_items < len(d.entries) // 2 - 16: + ll_dict_remove_deleted_items(d) + + hash = d.keyhash(key) + old_index = d.lookup_function(d, key, hash, FLAG_LOOKUP) + if old_index <= 0: + if old_index < 0: + raise KeyError + else: + return + + # the goal of the following is to set 'idst' to the number of + # deleted entries at the beginning, ensuring 'idst > 0' + must_reindex = False + if d.entries.valid(0): + # the first entry is valid, so we need to make room before. + new_allocated = _overallocate_entries_len(d.num_ever_used_items) + idst = ((new_allocated - d.num_ever_used_items) * 3) // 4 + ll_assert(idst > 0, "overallocate did not do enough") + newitems = lltype.malloc(lltype.typeOf(d).TO.entries.TO, new_allocated) + rgc.ll_arraycopy(d.entries, newitems, 0, idst, d.num_ever_used_items) + d.entries = newitems + i = 0 + while i < idst: + d.entries.mark_deleted(i) + i += 1 + d.num_ever_used_items += idst + old_index += idst + must_reindex = True + idst -= 1 + else: + idst = d.lookup_function_no >> FUNC_SHIFT + # All entries in range(0, idst) are deleted. Check if more are + while not d.entries.valid(idst): + idst += 1 + if idst == old_index: + d.lookup_function_no = ((d.lookup_function_no & FUNC_MASK) | + (old_index << FUNC_SHIFT)) + return + idst -= 1 + d.lookup_function_no = ((d.lookup_function_no & FUNC_MASK) | + (idst << FUNC_SHIFT)) + + # remove the entry at the old position + ll_assert(d.entries.valid(old_index), + "ll_dict_move_to_first: lost old_index") + ENTRY = lltype.typeOf(d.entries).TO.OF + old_entry = d.entries[old_index] + key = old_entry.key + value = old_entry.value + if hasattr(ENTRY, 'f_hash'): + ll_assert(old_entry.f_hash == hash, + "ll_dict_move_to_first: bad hash") + _ll_dict_del_entry(d, old_index) + + # put the entry at its new position + ll_assert(not d.entries.valid(idst), + "ll_dict_move_to_first: overwriting idst") + new_entry = d.entries[idst] + new_entry.key = key + new_entry.value = value + if hasattr(ENTRY, 'f_hash'): + new_entry.f_hash = hash + if hasattr(ENTRY, 'f_valid'): + new_entry.f_valid = True + d.num_live_items += 1 + + # fix the index + if must_reindex: + ll_dict_reindex(d, _ll_len_of_d_indexes(d)) + else: + ll_call_delete_by_entry_index(d, hash, old_index, + replace_with = VALID_OFFSET + idst) diff --git a/rpython/rtyper/test/test_rdict.py b/rpython/rtyper/test/test_rdict.py --- a/rpython/rtyper/test/test_rdict.py +++ b/rpython/rtyper/test/test_rdict.py @@ -1188,6 +1188,12 @@ assert not self.ll_contains(self.l_dict, ll_key) self.removed_keys.append(key) + def move_to_end(self, key, last=True): + "For test_rordereddict" + + def move_to_first(self, key): + self.move_to_end(key, last=False) + def copydict(self): self.l_dict = self.ll_copy(self.l_dict) assert self.ll_len(self.l_dict) == len(self.reference) @@ -1250,6 +1256,15 @@ return builds(Action, just('delitem'), tuples(sampled_from(self.space.reference))) + def st_move_to_end(self): + return builds(Action, + just('move_to_end'), tuples(sampled_from(self.space.reference))) + + def st_move_to_first(self): + return builds(Action, + just('move_to_first'), + tuples(sampled_from(self.space.reference))) + def steps(self): if not self.space: return builds(Action, just('setup'), tuples(st_keys, st_values)) @@ -1258,7 +1273,8 @@ if self.space.reference: return ( self.st_setitem() | sampled_from(global_actions) | - self.st_updateitem() | self.st_delitem()) + self.st_updateitem() | self.st_delitem() | + self.st_move_to_end() | self.st_move_to_first()) else: return (self.st_setitem() | sampled_from(global_actions)) diff --git a/rpython/rtyper/test/test_rordereddict.py b/rpython/rtyper/test/test_rordereddict.py --- a/rpython/rtyper/test/test_rordereddict.py +++ b/rpython/rtyper/test/test_rordereddict.py @@ -1,7 +1,8 @@ import py +import random from collections import OrderedDict -from hypothesis import settings +from hypothesis import settings, given, strategies from hypothesis.stateful import run_state_machine_as_test from rpython.rtyper.lltypesystem import lltype, rffi @@ -145,14 +146,18 @@ ll_d = rordereddict.ll_newdict(DICT) rordereddict.ll_dict_setitem(ll_d, llstr("k"), 1) rordereddict.ll_dict_setitem(ll_d, llstr("j"), 2) - ITER = rordereddict.get_ll_dictiter(lltype.Ptr(DICT)) + assert [hlstr(entry.key) for entry in self._ll_iter(ll_d)] == ["k", "j"] + + def _ll_iter(self, ll_d): + ITER = rordereddict.get_ll_dictiter(lltype.typeOf(ll_d)) ll_iter = rordereddict.ll_dictiter(ITER, ll_d) ll_dictnext = rordereddict._ll_dictnext - num = ll_dictnext(ll_iter) - assert hlstr(ll_d.entries[num].key) == "k" - num = ll_dictnext(ll_iter) - assert hlstr(ll_d.entries[num].key) == "j" - py.test.raises(StopIteration, ll_dictnext, ll_iter) + while True: + try: + num = ll_dictnext(ll_iter) + except StopIteration: + break + yield ll_d.entries[num] def test_popitem(self): DICT = self._get_str_dict() @@ -337,6 +342,31 @@ num_nonfrees += (got > 0) assert d.resize_counter <= idx.getlength() * 2 - num_nonfrees * 3 + @given(strategies.lists(strategies.integers(min_value=1, max_value=5))) + def test_direct_move_to_end(self, lst): + DICT = self._get_int_dict() + ll_d = rordereddict.ll_newdict(DICT) + rordereddict.ll_dict_setitem(ll_d, 1, 11) + rordereddict.ll_dict_setitem(ll_d, 2, 22) + def content(): + return [(entry.key, entry.value) for entry in self._ll_iter(ll_d)] + for case in lst: + if case == 1: + rordereddict.ll_dict_move_to_end(ll_d, 1, True) + assert content() == [(2, 22), (1, 11)] + elif case == 2: + rordereddict.ll_dict_move_to_end(ll_d, 2, True) + assert content() == [(1, 11), (2, 22)] + elif case == 3: + py.test.raises(KeyError, rordereddict.ll_dict_move_to_end, + ll_d, 3, True) + elif case == 4: + rordereddict.ll_dict_move_to_end(ll_d, 2, False) + assert content() == [(2, 22), (1, 11)] + elif case == 5: + rordereddict.ll_dict_move_to_end(ll_d, 1, False) + assert content() == [(1, 11), (2, 22)] + class TestRDictDirectDummyKey(TestRDictDirect): class dummykeyobj: @@ -369,10 +399,29 @@ res = self.interpret(func, [5]) assert res == 6 + def test_move_to_end(self): + def func(): + d1 = OrderedDict() + d1['key1'] = 'value1' + d1['key2'] = 'value2' + for i in range(20): + objectmodel.move_to_end(d1, 'key1') + assert d1.keys() == ['key2', 'key1'] + objectmodel.move_to_end(d1, 'key2') + assert d1.keys() == ['key1', 'key2'] + for i in range(20): + objectmodel.move_to_end(d1, 'key2', last=False) + assert d1.keys() == ['key2', 'key1'] + objectmodel.move_to_end(d1, 'key1', last=False) + assert d1.keys() == ['key1', 'key2'] + func() + self.interpret(func, []) + class ODictSpace(MappingSpace): MappingRepr = rodct.OrderedDictRepr new_reference = OrderedDict + moved_around = False ll_getitem = staticmethod(rodct.ll_dict_getitem) ll_setitem = staticmethod(rodct.ll_dict_setitem) ll_delitem = staticmethod(rodct.ll_dict_delitem) @@ -417,9 +466,23 @@ def removeindex(self): # remove the index, as done during translation for prebuilt dicts # (but cannot be done if we already removed a key) - if not self.removed_keys: + if not self.removed_keys and not self.moved_around: rodct.ll_no_initial_index(self.l_dict) + def move_to_end(self, key, last=True): + ll_key = self.ll_key(key) + rodct.ll_dict_move_to_end(self.l_dict, ll_key, last) + value = self.reference.pop(key) + if last: + self.reference[key] = value + else: + items = self.reference.items() + self.reference.clear() + self.reference[key] = value + self.reference.update(items) + # prevent ll_no_initial_index() + self.moved_around = True + def fullcheck(self): # overridden to also check key order assert self.ll_len(self.l_dict) == len(self.reference) _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit