Author: Carl Friedrich Bolz <[email protected]>
Branch: bitset-intsets
Changeset: r95940:bfd90a10b38c
Date: 2016-02-27 15:39 +0100
http://bitbucket.org/pypy/pypy/changeset/bfd90a10b38c/

Log:    in-progress: start implementing integer sets as bitsets

diff --git a/pypy/objspace/std/setobject.py b/pypy/objspace/std/setobject.py
--- a/pypy/objspace/std/setobject.py
+++ b/pypy/objspace/std/setobject.py
@@ -13,6 +13,14 @@
 from rpython.rlib.rarithmetic import intmask, r_uint
 from rpython.rlib import rerased, jit
 
+def popcount(i):
+    # XXX
+    i = r_uint(i)
+    res = 0
+    while i:
+        res += i & 1
+        i >>= 1
+    return res
 
 UNROLL_CUTOFF = 5
 
@@ -687,7 +695,7 @@
     # __________________ methods called on W_SetObject _________________
 
     def clear(self, w_set):
-        raise NotImplementedError
+        w_set.switch_to_empty_strategy()
 
     def copy_real(self, w_set):
         raise NotImplementedError
@@ -878,9 +886,6 @@
     def length(self, w_set):
         return len(self.unerase(w_set.sstorage))
 
-    def clear(self, w_set):
-        w_set.switch_to_empty_strategy()
-
     def copy_real(self, w_set):
         # may be used internally on frozen sets, although frozenset().copy()
         # returns self in frozenset_copy__Frozenset.
@@ -1281,7 +1286,7 @@
         return UnicodeIteratorImplementation(self.space, self, w_set)
 
 
-class IntegerSetStrategy(AbstractUnwrappedSetStrategy, SetStrategy):
+class IntegerSetStrategy(SetStrategy):
     erase, unerase = rerased.new_erasing_pair("integer")
     erase = staticmethod(erase)
     unerase = staticmethod(unerase)
@@ -1289,6 +1294,9 @@
     intersect_jmp = jit.JitDriver(greens = [], reds = 'auto',
                                   name='set(int).intersect')
 
+    BITMASK = 3
+    KEYMASK = ~BITMASK
+
     def get_empty_storage(self):
         return self.erase({})
 
@@ -1296,7 +1304,16 @@
         return {}
 
     def listview_int(self, w_set):
-        return self.unerase(w_set.sstorage).keys()
+        result = []
+        items = self.unerase(w_set.sstorage).iteritems()
+        for key, value in items:
+            i = 0
+            while value:
+                if value & 1:
+                    result.append(key | i)
+                value >>= 1
+                i += 1
+        return result
 
     def is_correct_type(self, w_key):
         return type(w_key) is W_IntObject
@@ -1321,6 +1338,231 @@
     def iter(self, w_set):
         return IntegerIteratorImplementation(self.space, self, w_set)
 
+    def _set_key(self, setdata, i):
+        setdata[i & self.KEYMASK] = setdata.get(i & self.KEYMASK, 0) | (1 << 
(i & self.BITMASK))
+
+    def _set_key_wrapped(self, setdata, w_key):
+        self._set_key(setdata, self.unwrap(w_key))
+
+    def _remove_key(self, setdata, i):
+        key = i & self.KEYMASK
+        item = setdata.get(key, 0)
+        bit = (1 << (i & self.BITMASK))
+        is_there = item & bit
+        if is_there:
+            if item & (~bit) == 0:
+                del setdata[key]
+            else:
+                setdata[key] = item & (~bit)
+        return bool(is_there)
+
+    def _has_key(self, setdata, i):
+        return bool(setdata.get(i & self.KEYMASK, 0) & (1 << (i & 
self.BITMASK)))
+
+    def _has_key_wrapped(self, setdata, w_key):
+        return self._has_key(setdata, self.unwrap(w_key))
+
+    @jit.look_inside_iff(lambda self, list_w:
+            jit.loop_unrolling_heuristic(list_w, len(list_w), UNROLL_CUTOFF))
+    def get_storage_from_list(self, list_w):
+        setdata = {}
+        for w_item in list_w:
+            self._set_key_wrapped(setdata, w_item)
+        return self.erase(setdata)
+
+    @jit.look_inside_iff(lambda self, items:
+            jit.loop_unrolling_heuristic(items, len(items), UNROLL_CUTOFF))
+    def get_storage_from_unwrapped_list(self, items):
+        setdata = self.get_empty_dict()
+        for item in items:
+            self._set_key(setdata, item)
+        return self.erase(setdata)
+
+    def get_storage_copy(self, w_set):
+        d = self.unerase(w_set.sstorage)
+        copy = self.erase(d.copy())
+        return copy
+
+    def copy_real(self, w_set):
+        # may be used internally on frozen sets, although frozenset().copy()
+        # returns self in frozenset_copy__Frozenset.
+        strategy = w_set.strategy
+        d = self.unerase(w_set.sstorage)
+        storage = self.erase(d.copy())
+        clone = w_set.from_storage_and_strategy(storage, strategy)
+        return clone
+
+    def add(self, w_set, w_key):
+        if self.is_correct_type(w_key):
+            d = self.unerase(w_set.sstorage)
+            self._set_key_wrapped(d, w_key)
+        else:
+            w_set.switch_to_object_strategy(self.space)
+            w_set.add(w_key)
+
+    def remove(self, w_set, w_item):
+        d = self.unerase(w_set.sstorage)
+        if not self.is_correct_type(w_item):
+            #XXX check type of w_item and immediately return False in some 
cases
+            w_set.switch_to_object_strategy(self.space)
+            return w_set.remove(w_item)
+
+        key = self.unwrap(w_item)
+        return self._remove_key(d, key)
+
+    def has_key(self, w_set, w_key):
+        if not self.is_correct_type(w_key):
+            #XXX check type of w_item and immediately return False in some 
cases
+            w_set.switch_to_object_strategy(self.space)
+            return w_set.has_key(w_key)
+        d = self.unerase(w_set.sstorage)
+        return self._has_key_wrapped(d, w_key)
+
+    def length(self, w_set):
+        l = 0
+        # XXX too slow!
+        for value in self.unerase(w_set.sstorage).itervalues():
+            assert value
+            l += popcount(value)
+        return l
+
+    def getdict_w(self, w_set):
+        result = newset(self.space)
+        items = self.unerase(w_set.sstorage).iteritems()
+        for key, value in items:
+            i = 0
+            while value:
+                if value & 1:
+                    result[self.wrap(key | i)] = None
+                value >>= 1
+                i += 1
+        return result
+
+    def update(self, w_set, w_other):
+        if self is w_other.strategy:
+            d_set = self.unerase(w_set.sstorage)
+            d_other = self.unerase(w_other.sstorage)
+            d_set.update(d_other)
+            return
+        if w_other.length() == 0:
+            return
+        w_set.switch_to_object_strategy(self.space)
+        w_set.update(w_other)
+
+    def XXX_symmetric_difference_base(self, w_set, w_other):
+        if self is w_other.strategy:
+            strategy = w_set.strategy
+            storage = self._symmetric_difference_unwrapped(w_set, w_other)
+        else:
+            w_set.switch_to_object_strategy(self.space)
+            return w_set.symmetric_difference(w_other)
+        return storage, strategy
+
+    def XXXsymmetric_difference(self, w_set, w_other):
+        if w_other.length() == 0:
+            return w_set.copy_real()
+        storage, strategy = self._symmetric_difference_base(w_set, w_other)
+        return w_set.from_storage_and_strategy(storage, strategy)
+
+    def symmetric_difference_update(self, w_set, w_other):
+        if w_other.length() == 0:
+            return
+        if self is w_other.strategy:
+            w_set.sstorage = self._symmetric_difference_unwrapped(w_set, 
w_other)
+        else:
+            w_set.switch_to_object_strategy(self.space)
+            w_set.symmetric_difference_update(w_other)
+
+    def _symmetric_difference_unwrapped(self, w_set, w_other):
+        d_new = self.get_empty_dict()
+        d_this = self.unerase(w_set.sstorage)
+        d_other = self.unerase(w_other.sstorage)
+        # XXX
+        for key, item in d_other.iteritems():
+            new = item ^ d_this.get(key, 0)
+            if new:
+                d_new[key] = new
+        for key, item in d_this.iteritems():
+            new = item ^ d_other.get(key, 0)
+            if new:
+                d_new[key] = new
+
+        storage = self.erase(d_new)
+        return storage
+
+    def _symmetric_difference_wrapped(self, w_set, w_other):
+        newsetdata = newset(self.space)
+        for obj in self.unerase(w_set.sstorage):
+            w_item = self.wrap(obj)
+            if not w_other.has_key(w_item):
+                newsetdata[w_item] = None
+
+        w_iterator = w_other.iter()
+        while True:
+            w_item = w_iterator.next_entry()
+            if w_item is None:
+                break
+            if not w_set.has_key(w_item):
+                newsetdata[w_item] = None
+
+        strategy = self.space.fromcache(ObjectSetStrategy)
+        return strategy.erase(newsetdata)
+
+    def _intersect_base(self, w_set, w_other):
+        if self is w_other.strategy:
+            strategy = self
+            if w_set.length() > w_other.length():
+                # swap operands
+                storage = self._intersect_unwrapped(w_other, w_set)
+            else:
+                storage = self._intersect_unwrapped(w_set, w_other)
+        elif not self.may_contain_equal_elements(w_other.strategy):
+            strategy = self.space.fromcache(EmptySetStrategy)
+            storage = strategy.get_empty_storage()
+        else:
+            strategy = self.space.fromcache(ObjectSetStrategy)
+            if w_set.length() > w_other.length():
+                # swap operands
+                storage = w_other.strategy._intersect_wrapped(w_other, w_set)
+            else:
+                storage = self._intersect_wrapped(w_set, w_other)
+        return storage, strategy
+
+    def XXX_intersect_wrapped(self, w_set, w_other):
+        result = newset(self.space)
+        for key in self.unerase(w_set.sstorage):
+            self.intersect_jmp.jit_merge_point()
+            w_key = self.wrap(key)
+            if w_other.has_key(w_key):
+                result[w_key] = None
+
+        strategy = self.space.fromcache(ObjectSetStrategy)
+        return strategy.erase(result)
+
+    def _intersect_unwrapped(self, w_set, w_other):
+        result = self.get_empty_dict()
+        d_this = self.unerase(w_set.sstorage)
+        d_other = self.unerase(w_other.sstorage)
+        for key, value in d_this.iteritems():
+            value = value & d_other.get(key, 0)
+            if value:
+                result[key] = value
+        return self.erase(result)
+
+    def intersect(self, w_set, w_other):
+        storage, strategy = self._intersect_base(w_set, w_other)
+        return w_set.from_storage_and_strategy(storage, strategy)
+
+    def intersect_update(self, w_set, w_other):
+        if w_set.length() > w_other.length():
+            w_intersection = w_other.intersect(w_set)
+            strategy = w_intersection.strategy
+            storage = w_intersection.sstorage
+        else:
+            storage, strategy = self._intersect_base(w_set, w_other)
+        w_set.strategy = strategy
+        w_set.sstorage = storage
+
 
 class ObjectSetStrategy(AbstractUnwrappedSetStrategy, SetStrategy):
     erase, unerase = rerased.new_erasing_pair("object")
@@ -1489,14 +1731,36 @@
     def __init__(self, space, strategy, w_set):
         IteratorImplementation.__init__(self, space, strategy, w_set)
         d = strategy.unerase(w_set.sstorage)
-        self.iterator = d.iterkeys()
+        self.iterator = d.iteritems()
+        self.key_from_dict = 0
+        self.value_from_dict = 0
+        self.intvalue = 0
 
     def next_entry(self):
-        # note that this 'for' loop only runs once, at most
-        for key in self.iterator:
-            return self.space.wrap(key)
-        else:
-            return None
+        value_from_dict = self.value_from_dict
+        key_from_dict = self.key_from_dict
+        intvalue = self.intvalue
+        if not value_from_dict:
+            # note that this 'for' loop only runs once, at most
+            for item in self.iterator:
+                key_from_dict, value_from_dict = item
+                self.key_from_dict = key_from_dict
+                assert value_from_dict
+                intvalue = 0
+                break
+            else:
+                return None
+        while True:
+            result = intvalue
+            should_return = value_from_dict & 1
+            value_from_dict >>= 1
+            intvalue += 1
+            if should_return:
+                self.value_from_dict = value_from_dict
+                self.intvalue = intvalue
+                return self.space.wrap(key_from_dict | result)
+
+
 
 class IdentityIteratorImplementation(IteratorImplementation):
     def __init__(self, space, strategy, w_set):
diff --git a/pypy/objspace/std/test/test_setstrategies.py 
b/pypy/objspace/std/test/test_setstrategies.py
--- a/pypy/objspace/std/test/test_setstrategies.py
+++ b/pypy/objspace/std/test/test_setstrategies.py
@@ -1,3 +1,4 @@
+import sys
 from pypy.objspace.std.setobject import W_SetObject
 from pypy.objspace.std.setobject import (
     BytesIteratorImplementation, BytesSetStrategy, EmptySetStrategy,
@@ -5,6 +6,12 @@
     UnicodeIteratorImplementation, UnicodeSetStrategy)
 from pypy.objspace.std.listobject import W_ListObject
 
+
+from hypothesis import strategies, given
+
+ints = strategies.integers(-sys.maxint-1, sys.maxint)
+intlists = strategies.lists(ints)
+
 class TestW_SetStrategies:
 
     def wrapped(self, l):
@@ -144,3 +151,85 @@
         #
         s = W_SetObject(space, self.wrapped([u"a", u"b"]))
         assert sorted(space.listview_unicode(s)) == [u"a", u"b"]
+
+
+class TestSetHypothesis:
+    def wrapped(self, l):
+        return W_ListObject(self.space, [self.space.wrap(x) for x in l])
+
+    def wrap(self, x):
+        return self.space.wrap(x)
+
+    def intset(self, content):
+        return W_SetObject(self.space, self.wrapped(content))
+
+    @given(intlists, ints)
+    def test_intset_added_element_in_set(self, content, i):
+        s = self.intset(content)
+        w_i = self.wrap(i)
+        s.add(w_i)
+        assert s.has_key(w_i)
+
+    @given(intlists, ints)
+    def test_remove(self, content, i):
+        s = self.intset(content + [i])
+        w_i = self.wrap(i)
+        s.remove(w_i)
+        assert not s.has_key(w_i)
+
+    @given(intlists, ints)
+    def test_length(self, content, i):
+        s = self.intset(content)
+        assert len(set(content)) == s.length()
+
+    @given(intlists, intlists)
+    def test_update(self, c1, c2):
+        s1 = self.intset(c1)
+        s2 = self.intset(c2)
+        s1.update(s2)
+        for i in c1:
+            assert s1.has_key(self.wrap(i))
+        for i in c2:
+            assert s1.has_key(self.wrap(i))
+        # XXX check that no additional keys
+
+    @given(intlists, intlists)
+    def test_symmetric_update(self, c1, c2):
+        s1 = self.intset(c1)
+        s2 = self.intset(c2)
+        s1.symmetric_difference_update(s2)
+        s1.length()
+        for i in c1:
+            if i not in c2:
+                assert s1.has_key(self.wrap(i))
+            else:
+                assert not s1.has_key(self.wrap(i))
+        for i in c2:
+            if i not in c1:
+                assert s1.has_key(self.wrap(i))
+            else:
+                assert not s1.has_key(self.wrap(i))
+        # XXX check that no additional keys
+
+    @given(intlists, intlists)
+    def XXXtest_update_vs_not(self, c1, c2):
+        return #XXX write me!
+
+    @given(intlists, intlists)
+    def test_intersect(self, c1, c2):
+        s1 = self.intset(c1)
+        s2 = self.intset(c2)
+        s = s1.intersect(s2)
+        for i in c1:
+            if i in c2:
+                assert s.has_key(self.wrap(i))
+            else:
+                assert not s.has_key(self.wrap(i))
+        for i in c2:
+            if i in c1:
+                assert s.has_key(self.wrap(i))
+            else:
+                assert not s.has_key(self.wrap(i))
+        # XXX check that no additional keys
+
+
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to