Author: Armin Rigo <[email protected]>
Branch: rpython-hash
Changeset: r89830:bf4314421560
Date: 2017-01-29 22:51 +0100
http://bitbucket.org/pypy/pypy/changeset/bf4314421560/
Log: fix a bug found by the test (and also fix and improve the test
itself)
diff --git a/rpython/rlib/rsiphash.py b/rpython/rlib/rsiphash.py
--- a/rpython/rlib/rsiphash.py
+++ b/rpython/rlib/rsiphash.py
@@ -74,7 +74,7 @@
rffi.cast(rffi.ULONG, -1))):
os.write(2,
"PYTHONHASHSEED must be \"random\" or an integer "
- "in range [0; 4294967295]")
+ "in range [0; 4294967295]\n")
os._exit(1)
if not seed:
# disable the randomized hash
@@ -149,7 +149,9 @@
# NOTE: a latin-1 unicode string must have the same hash as the
# corresponding byte string. If the unicode is all within
# 0-255, then we need to allocate a byte buffer and copy the
- # latin-1 encoding in it manually.
+ # latin-1 encoding in it manually. Note also that we give a
+ # different hash result than CPython on ucs4 platforms, for
+ # unicode strings where CPython uses 2 bytes per character.
for i in range(length):
if ord(ll_s.chars[i]) > 0xFF:
addr = rstr._get_raw_buf_unicode(rstr.UNICODE, ll_s, 0)
diff --git a/rpython/rlib/test/test_rsiphash.py
b/rpython/rlib/test/test_rsiphash.py
--- a/rpython/rlib/test/test_rsiphash.py
+++ b/rpython/rlib/test/test_rsiphash.py
@@ -74,38 +74,72 @@
def test_translated():
d1 = {"foo": 123}
- d2 = {u"foo": 456, u"\u1234": 789}
+ d2 = {u"foo": 456, u"\u1234\u5678": 789}
+ class G:
+ pass
+ g = G()
+ g.v1 = d1.copy()
+ g.v2 = d2.copy()
- def entrypoint():
+ def fetch(n):
+ if n == 0: return d1.get("foo", -1)
+ if n == 1: return g.v1.get("foo", -1)
+ if n == 2: return compute_hash("foo")
+ if n == 3: return d2.get(u"foo", -1)
+ if n == 4: return g.v2.get(u"foo", -1)
+ if n == 5: return compute_hash(u"foo")
+ if n == 6: return d2.get(u"\u1234\u5678", -1)
+ if n == 7: return g.v2.get(u"\u1234\u5678", -1)
+ if n == 8: return compute_hash(u"\u1234\u5678")
+ assert 0
+
+ def entrypoint(n):
enable_siphash24()
- return '%d %d %d %d %d %d' % (
- d1.get("foo", -1), compute_hash("bar"),
- d2.get(u"foo", -1), compute_hash(u"foo"),
- d2.get(u"\u1234", -1), compute_hash(u"\u1234"))
+ g.v1["bar"] = -2
+ g.v2[u"bar"] = -2
+ if n >= 0: # get items one by one, because otherwise it may
+ # be the case that one line influences the next
+ return str(fetch(n))
+ else:
+ # ...except in random mode, because we want all results
+ # to be computed with the same seed
+ return ' '.join([str(fetch(n)) for n in range(9)])
- fn = compile(entrypoint, [])
+ fn = compile(entrypoint, [int])
+
+ def getall():
+ return [int(fn(i)) for i in range(9)]
old_val = os.environ.get('PYTHONHASHSEED', None)
try:
os.environ['PYTHONHASHSEED'] = '0'
- s1 = fn()
- assert map(int, s1.split()) == [
- 123, intmask(15988776847138518036),
- 456, intmask(15988776847138518036),
- 789, intmask(16003099094427356855)]
+ s1 = getall()
+ assert s1[:8] == [
+ 123, 123, intmask(15988776847138518036),
+ 456, 456, intmask(15988776847138518036),
+ 789, 789]
+ assert s1[8] in [intmask(17593683438421985039), # ucs2 mode
+ intmask(94801584261658677)] # ucs4 mode
os.environ['PYTHONHASHSEED'] = '3987654321'
- s1 = fn()
- assert map(int, s1.split()) == [
- 123, intmask(5890804383681474441),
- 456, intmask(5890804383681474441),
- 789, intmask(10331001347733193222)]
+ s1 = getall()
+ assert s1[:8] == [
+ 123, 123, intmask(5890804383681474441),
+ 456, 456, intmask(5890804383681474441),
+ 789, 789]
+ assert s1[8] in [intmask(4192582507672183374), # ucs2 mode
+ intmask(7179255293164649778)] # ucs4 mode
for env in ['', 'random']:
os.environ['PYTHONHASHSEED'] = env
- s1 = fn()
- s2 = fn()
- assert s1 != s2
+ s1 = map(int, fn(-1).split())
+ s2 = map(int, fn(-1).split())
+ assert s1[0:2]+s1[3:5]+s1[6:8] == [123, 123, 456, 456, 789, 789]
+ assert s1[2] == s1[5]
+ assert s2[0:2]+s2[3:5]+s2[6:8] == [123, 123, 456, 456, 789, 789]
+ assert s2[2] == s2[5]
+ #
+ assert len(set([s1[2], s2[2], s1[8], s2[8]])) == 4
finally:
if old_val is None:
diff --git a/rpython/rtyper/lltypesystem/rordereddict.py
b/rpython/rtyper/lltypesystem/rordereddict.py
--- a/rpython/rtyper/lltypesystem/rordereddict.py
+++ b/rpython/rtyper/lltypesystem/rordereddict.py
@@ -888,13 +888,18 @@
assert d.num_live_items == d.num_ever_used_items
assert not d.indexes
#
- # recompute all hashes, if they are stored in d.entries
+ # recompute all hashes. Needed if they are stored in d.entries,
+ # but do it anyway: otherwise, e.g. a string-keyed dictionary
+ # won't have a fasthash on its strings if their hash is still
+ # uncomputed.
ENTRY = lltype.typeOf(d.entries).TO.OF
- if hasattr(ENTRY, 'f_hash'):
- for i in range(d.num_ever_used_items):
- assert d.entries.valid(i)
- d_entry = d.entries[i]
- d_entry.f_hash = d.keyhash(d_entry.key)
+ for i in range(d.num_ever_used_items):
+ assert d.entries.valid(i)
+ d_entry = d.entries[i]
+ h = d.keyhash(d_entry.key)
+ if hasattr(ENTRY, 'f_hash'):
+ d_entry.f_hash = h
+ #else: purely for the side-effect it can have on d_entry.key
#
# Use the smallest acceptable size for ll_dict_reindex
new_size = DICT_INITSIZE
diff --git a/rpython/rtyper/lltypesystem/rstr.py
b/rpython/rtyper/lltypesystem/rstr.py
--- a/rpython/rtyper/lltypesystem/rstr.py
+++ b/rpython/rtyper/lltypesystem/rstr.py
@@ -3,7 +3,7 @@
from rpython.annotator import model as annmodel
from rpython.rlib import jit, types, objectmodel
from rpython.rlib.objectmodel import (malloc_zero_filled, we_are_translated,
- ll_hash_string, keepalive_until_here, specialize, enforceargs)
+ ll_hash_string, keepalive_until_here, specialize, enforceargs, dont_inline)
from rpython.rlib.signature import signature
from rpython.rlib.rarithmetic import ovfcheck
from rpython.rtyper.error import TyperError
@@ -383,6 +383,7 @@
return 0
@staticmethod
+ @dont_inline
def _ll_strhash(s):
# unlike CPython, there is no reason to avoid to return -1
# but our malloc initializes the memory to zero, so we use zero as the
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit