Author: Maciej Fijalkowski <[email protected]>
Branch: rdict-experiments
Changeset: r59658:c463f8834835
Date: 2013-01-03 10:50 +0200
http://bitbucket.org/pypy/pypy/changeset/c463f8834835/

Log:    see how good is rhettingers dict in rpython

diff --git a/pypy/rlib/dict.py b/pypy/rlib/dict.py
new file mode 100644
--- /dev/null
+++ b/pypy/rlib/dict.py
@@ -0,0 +1,198 @@
+
+from pypy.rlib.objectmodel import compute_identity_hash
+
+# Placeholder constants
+
+FREE = -1
+DUMMY = -2
+
+class Dict(object):
+    'Space efficient dictionary with fast iteration and cheap resizes.'
+
+    def _lookup(self, key, hashvalue):
+        'Same lookup logic as currently used in real dicts'
+        assert self.filled < len(self.indices)   # At least one open slot
+        freeslot = -1
+        PERTURB_SHIFT = 5
+        if hashvalue < 0:
+            perturb = -hashvalue
+        else:
+            perturb = hashvalue
+        n = len(self.indices)
+        i = perturb & (n-1)
+        while True:
+            index = self.indices[i]
+            if index == FREE:
+                return (FREE, i) if freeslot == -1 else (DUMMY, freeslot)
+            elif index == DUMMY:
+                freeslot = i
+            elif (self.keylist[index] is key or
+                  self.hashlist[index] == hashvalue
+                  and self.keylist[index] == key):
+                    return (index, i)
+            i = 5 * i + perturb + 1
+            i = i & (n-1)
+            perturb >>= PERTURB_SHIFT
+
+    @staticmethod
+    def _make_index(n):
+        'New sequence of indices using the smallest possible datatype'
+        #if n <= 2**7: return array.array('b', [FREE]) * n       # signed char
+        #if n <= 2**15: return array.array('h', [FREE]) * n      # signed short
+        #if n <= 2**31: return array.array('l', [FREE]) * n      # signed long
+        return [FREE] * n                                       # python 
integers
+
+    def _resize(self, n):
+        '''Reindex the existing hash/key/value entries.
+           Entries do not get moved, they only get new indices.
+           No calls are made to hash() or __eq__().
+
+        '''
+        new_size = 8
+        while new_size < n:
+            new_size <<= 1
+        n = new_size
+        self.indices = self._make_index(n)
+        PERTURB_SHIFT = 5
+        for index, hashvalue in enumerate(self.hashlist):
+            if hashvalue < 0:
+                perturb = -hashvalue
+            else:
+                perturb = hashvalue
+            i = hashvalue & (n-1)
+            while True:
+                if self.indices[i] == FREE:
+                    break
+                i = 5 * i + perturb + 1
+                i = i & (n-1)
+                perturb >>= PERTURB_SHIFT
+            self.indices[i] = index
+        self.filled = self.used
+
+    def clear(self):
+        self.indices = self._make_index(8)
+        self.hashlist = []
+        self.keylist = []
+        self.valuelist = []
+        self.used = 0
+        self.filled = 0                                         # used + 
dummies
+
+    def __init__(self):
+        self.clear()
+
+    def __getitem__(self, key):
+        hashvalue = hash(key)
+        index, i = self._lookup(key, hashvalue)
+        if index < 0:
+            raise KeyError(key)
+        return self.valuelist[index]
+
+    def __setitem__(self, key, value):
+        hashvalue = key # hash
+        index, i = self._lookup(key, hashvalue)
+        if index < 0:
+            self.indices[i] = self.used
+            self.hashlist.append(hashvalue)
+            self.keylist.append(key)
+            self.valuelist.append(value)
+            self.used += 1
+            if index == FREE:
+                self.filled += 1
+                if self.filled * 3 > len(self.indices) * 2:
+                    self._resize(4 * self.__len__())
+        else:
+            self.valuelist[index] = value
+
+    def __delitem__(self, key):
+        hashvalue = hash(key)
+        index, i = self._lookup(key, hashvalue)
+        if index < 0:
+            raise KeyError(key)
+        self.indices[i] = DUMMY
+        self.used -= 1
+        # If needed, swap with the lastmost entry to avoid leaving a "hole"
+        if index != self.used:
+            lasthash = self.hashlist[-1]
+            lastkey = self.keylist[-1]
+            lastvalue = self.valuelist[-1]
+            lastindex, j = self._lookup(lastkey, lasthash)
+            assert lastindex >= 0 and i != j
+            self.indices[j] = index
+            self.hashlist[index] = lasthash
+            self.keylist[index] = lastkey
+            self.valuelist[index] = lastvalue
+        # Remove the lastmost entry
+        self.hashlist.pop()
+        self.keylist.pop()
+        self.valuelist.pop()
+
+    def __len__(self):
+        return self.used
+
+    def __iter__(self):
+        return iter(self.keylist)
+
+    def iterkeys(self):
+        return iter(self.keylist)
+
+    def keys(self):
+        return list(self.keylist)
+
+    def itervalues(self):
+        return iter(self.valuelist)
+
+    def values(self):
+        return list(self.valuelist)
+
+    def iteritems(self):
+        return itertools.izip(self.keylist, self.valuelist)
+
+    def items(self):
+        return zip(self.keylist, self.valuelist)
+
+    def __contains__(self, key):
+        index, i = self._lookup(key, hash(key))
+        return index >= 0
+
+    def get(self, key, default=None):
+        'D.get(k[,d]) -> D[k] if k in D, else d.  d defaults to None.'
+        index, i = self._lookup(key, hash(key))
+        return self.valuelist[index] if index >= 0 else default
+
+    def popitem(self):
+        ''' D.popitem() -> (k, v), remove and return some (key, value) pair as 
a
+            2-tuple; but raise KeyError if D is empty.
+        '''
+        try:
+            key = self.keylist[-1]
+            value = self.valuelist[-1]
+        except IndexError:
+            raise KeyError( 'popitem(): dictionary is empty')
+        del self[key]
+        return key, value
+
+    def __repr__(self):
+        return 'Dict(%r)' % self.items()
+
+    def show_structure(self):
+        'Diagnostic method.  Not part of the API.'
+        print '=' * 50
+        print self
+        print 'Indices:', self.indices
+        for i, row in enumerate(zip(self.hashlist, self.keylist, 
self.valuelist)):
+            print i, row
+        print '-' * 50
+
+
+if __name__ == '__main__':
+    import sys
+    def f():
+        if len(sys.argv) > 1:
+            d = {}
+        else:
+            d = Dict()
+        class A(object):
+            pass
+        for i in range(10000000):
+            d[i] = A()
+    f()
diff --git a/pypy/translator/goal/targetnopstandalone.py 
b/pypy/translator/goal/targetnopstandalone.py
--- a/pypy/translator/goal/targetnopstandalone.py
+++ b/pypy/translator/goal/targetnopstandalone.py
@@ -7,13 +7,24 @@
 actually implementing argv of the executable.
 """
 
-def debug(msg):
-    print "debug:", msg
-
 # __________  Entry point  __________
 
+class A(object):
+    def __init__(self, a, b, c):
+        self.a = a
+        self.b = b
+        self.c = c
+
 def entry_point(argv):
-    debug("hello world")
+    if len(argv) > 2:
+        d = {}
+        for i in range(int(argv[1])):
+            d[i] = A(i, i, i)
+    else:
+        from pypy.rlib import dict
+        d = dict.Dict()
+        for i in range(int(argv[1])):
+            d.__setitem__(i, A(i, i, i))
     return 0
 
 # _____ Define and setup target ___
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to