Author: Carl Friedrich Bolz-Tereick <[email protected]>
Branch: unicode-utf8
Changeset: r95694:b6331207f8b9
Date: 2019-01-21 13:11 +0100
http://bitbucket.org/pypy/pypy/changeset/b6331207f8b9/
Log: change UnicodeDictStrategy to store wrapped unicode objects as keys.
this lifts the restriction that they are ascii only. the faster hash
and eq dispatching should still be a big win compared to going
through the space, despite the keys being wrapped.
diff --git a/pypy/objspace/std/dictmultiobject.py
b/pypy/objspace/std/dictmultiobject.py
--- a/pypy/objspace/std/dictmultiobject.py
+++ b/pypy/objspace/std/dictmultiobject.py
@@ -639,7 +639,7 @@
if type(w_key) is self.space.StringObjectCls:
self.switch_to_bytes_strategy(w_dict)
return
- elif type(w_key) is self.space.UnicodeObjectCls and w_key.is_ascii():
+ elif type(w_key) is self.space.UnicodeObjectCls:
self.switch_to_unicode_strategy(w_dict)
return
w_type = self.space.type(w_key)
@@ -1193,6 +1193,11 @@
create_iterator_classes(BytesDictStrategy)
+def unicode_eq(w_uni1, w_uni2):
+ return w_uni1.eq_w(w_uni2)
+
+def unicode_hash(w_uni):
+ return w_uni.hash_w()
class UnicodeDictStrategy(AbstractTypedStrategy, DictStrategy):
erase, unerase = rerased.new_erasing_pair("unicode")
@@ -1200,18 +1205,18 @@
unerase = staticmethod(unerase)
def wrap(self, unwrapped):
- return self.space.newutf8(unwrapped, len(unwrapped))
+ return unwrapped
def unwrap(self, wrapped):
- return self.space.utf8_w(wrapped)
+ return wrapped
def is_correct_type(self, w_obj):
space = self.space
- return type(w_obj) is space.UnicodeObjectCls and w_obj.is_ascii()
+ return type(w_obj) is space.UnicodeObjectCls
def get_empty_storage(self):
- res = {}
- mark_dict_non_null(res)
+ res = r_dict(unicode_eq, unicode_hash,
+ force_non_null=True)
return self.erase(res)
def _never_equal_to(self, w_lookup_type):
@@ -1235,14 +1240,14 @@
## assert key is not None
## return self.unerase(w_dict.dstorage).get(key, None)
- def listview_utf8(self, w_dict):
- return self.unerase(w_dict.dstorage).keys()
+ ## def listview_utf8(self, w_dict):
+ ## return self.unerase(w_dict.dstorage).keys()
## def w_keys(self, w_dict):
## return self.space.newlist_bytes(self.listview_bytes(w_dict))
def wrapkey(space, key):
- return space.newutf8(key, len(key))
+ return key
## @jit.look_inside_iff(lambda self, w_dict:
## w_dict_unrolling_heuristic(w_dict))
diff --git a/pypy/objspace/std/test/test_dictmultiobject.py
b/pypy/objspace/std/test/test_dictmultiobject.py
--- a/pypy/objspace/std/test/test_dictmultiobject.py
+++ b/pypy/objspace/std/test/test_dictmultiobject.py
@@ -1,3 +1,4 @@
+# -*- encoding: utf-8 -*-
import sys
import py
@@ -141,6 +142,7 @@
w_d.initialize_content([(wb("a"), w(1)), (wb("b"), w(2))])
assert self.space.listview_bytes(w_d) == ["a", "b"]
+ @py.test.mark.skip("possible re-enable later?")
def test_listview_unicode_dict(self):
w = self.space.wrap
w_d = self.space.newdict()
@@ -1151,8 +1153,11 @@
assert d.keys() == [u"a"]
assert type(d.keys()[0]) is unicode
+ d = {}
+ d[u"ä"] = 1
+ assert "UnicodeDictStrategy" in self.get_strategy(d)
+
def test_empty_to_int(self):
- import sys
d = {}
d[1] = "hi"
assert "IntDictStrategy" in self.get_strategy(d)
diff --git a/pypy/objspace/std/unicodeobject.py
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -224,10 +224,19 @@
def descr_str(self, space):
return encode_object(space, self, 'ascii', 'strict')
- def descr_hash(self, space):
+ def hash_w(self):
+ # shortcut for UnicodeDictStrategy
x = compute_hash(self._utf8)
x -= (x == -1) # convert -1 to -2 without creating a bridge
- return space.newint(x)
+ return x
+
+ def descr_hash(self, space):
+ return space.newint(self.hash_w())
+
+ def eq_w(self, w_other):
+ # shortcut for UnicodeDictStrategy
+ assert isinstance(w_other, W_UnicodeObject)
+ return self._utf8 == w_other._utf8
def descr_eq(self, space, w_other):
try:
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit