Author: Armin Rigo <[email protected]>
Branch: rpython-hash
Changeset: r89821:a36ffe6d56ba
Date: 2017-01-28 16:55 +0100
http://bitbucket.org/pypy/pypy/changeset/a36ffe6d56ba/
Log: Make rsiphash initialize itself at runtime, either with a random
seed or with the value of PYTHONHASHSEED, like CPython (the name
PYTHONHASHSEED can be changed by the RPython interpreter if needed).
diff --git a/rpython/rlib/rsiphash.py b/rpython/rlib/rsiphash.py
--- a/rpython/rlib/rsiphash.py
+++ b/rpython/rlib/rsiphash.py
@@ -1,9 +1,10 @@
-import sys, os, struct
+import sys, os
from contextlib import contextmanager
-from rpython.rlib import rarithmetic
+from rpython.rlib import rarithmetic, rurandom
from rpython.rlib.objectmodel import not_rpython, always_inline
+from rpython.rlib.objectmodel import we_are_translated, dont_inline
from rpython.rlib.rgc import no_collect
-from rpython.rlib.rarithmetic import r_uint64
+from rpython.rlib.rarithmetic import r_uint64, r_uint32, r_uint
from rpython.rlib.rawstorage import misaligned_is_fine
from rpython.rtyper.lltypesystem import lltype, llmemory, rffi
from rpython.rtyper.lltypesystem.lloperation import llop
@@ -16,37 +17,82 @@
_le64toh = rarithmetic.byteswap
-# Initialize the values of the secret seed: two 64-bit constants.
-# CPython picks a new seed every time 'python' starts. PyPy cannot do
-# that as easily because many details may rely on getting the same hash
-# value before and after translation. We can, however, pick a random
-# seed once per translation, which should already be quite good.
-#
-# XXX no, it is not: e.g. all Ubuntu installations of the same Ubuntu
-# would get the same seed. That's not good enough.
+class Seed:
+ k0l = k1l = r_uint64(0)
+ initialized = False
+seed = Seed()
-@not_rpython
-def select_random_seed():
- global k0, k1 # note: the globals k0, k1 are already byte-swapped
- v0, v1 = struct.unpack("QQ", os.urandom(16))
- k0 = r_uint64(v0)
- k1 = r_uint64(v1)
-select_random_seed()
+def select_random_seed(s):
+ """'s' is a string of length 16"""
+ seed.k0l = (
+ ord(s[0]) | ord(s[1]) << 8 | ord(s[2]) << 16 | ord(s[3]) << 24 |
+ ord(s[4]) << 32 | ord(s[5]) << 40 | ord(s[6]) << 48 | ord(s[7]) << 56)
+ seed.k1l = (
+ ord(s[8]) | ord(s[9]) << 8 | ord(s[10]) << 16 | ord(s[11]) << 24 |
+ ord(s[12]) << 32 | ord(s[13]) << 40 | ord(s[14]) << 48 | ord(s[15]) <<
56)
+
+
+random_ctx = rurandom.init_urandom()
+
+def lcg_urandom(value):
+ # Quite unsure what the point of this function is, given that a hash
+ # seed of the form '%s\x00\x00\x00..' should be just as hard to
+ # guess as this one. We copy it anyway from CPython for the case
+ # where 'value' is a 32-bit unsigned number, but if it is not, we
+ # fall back to the '%s\x00\x00\x00..' form.
+ if value == '0':
+ value = ''
+ try:
+ x = r_uint(r_uint32(value))
+ except (ValueError, OverflowError):
+ x = r_uint(0)
+ if str(x) == value:
+ s = ''
+ for index in range(16):
+ x *= 214013
+ x += 2531011
+ x = r_uint(r_uint32(x))
+ s += chr((x >> 16) & 0xff)
+ else:
+ if len(value) < 16:
+ s = value + '\x00' * (16 - len(value))
+ else:
+ s = value[:16]
+ return s
+
+env_var_name = "PYTHONHASHSEED"
+
+@dont_inline
+def initialize_from_env():
+ # This uses the same algorithms as CPython 3.5. The environment
+ # variable we read also defaults to "PYTHONHASHSEED". If needed,
+ # a different RPython interpreter can patch the value of the
+ # global variable 'env_var_name', or completely patch this function
+ # with a different one.
+ value = os.environ.get(env_var_name)
+ if len(value) > 0 and value != "random":
+ s = lcg_urandom(value)
+ else:
+ s = rurandom.urandom(random_ctx, 16)
+ select_random_seed(s)
+ seed.initialized = True
+
@contextmanager
def choosen_seed(new_k0, new_k1, test_misaligned_path=False):
- global k0, k1, misaligned_is_fine
- old = k0, k1, misaligned_is_fine
- k0 = _le64toh(r_uint64(new_k0))
- k1 = _le64toh(r_uint64(new_k1))
+ """For tests."""
+ global misaligned_is_fine
+ old = seed.k0l, seed.k1l, misaligned_is_fine
+ seed.k0l = _le64toh(r_uint64(new_k0))
+ seed.k1l = _le64toh(r_uint64(new_k1))
if test_misaligned_path:
misaligned_is_fine = False
yield
- k0, k1, misaligned_is_fine = old
+ seed.k0l, seed.k1l, misaligned_is_fine = old
def get_current_seed():
- return _le64toh(k0), _le64toh(k1)
+ return _le64toh(seed.k0l), _le64toh(seed.k1l)
magic0 = r_uint64(0x736f6d6570736575)
@@ -82,15 +128,18 @@
"""Takes an address pointer and a size. Returns the hash as a r_uint64,
which can then be casted to the expected type."""
- direct = (misaligned_is_fine or
- (rffi.cast(lltype.Signed, addr_in) & 7) == 0)
-
+ if we_are_translated() and not seed.initialized:
+ initialize_from_env()
+ k0 = seed.k0l
+ k1 = seed.k1l
b = r_uint64(size) << 56
v0 = k0 ^ magic0
v1 = k1 ^ magic1
v2 = k0 ^ magic2
v3 = k1 ^ magic3
+ direct = (misaligned_is_fine or
+ (rffi.cast(lltype.Signed, addr_in) & 7) == 0)
index = 0
if direct:
while size >= 8:
@@ -113,7 +162,6 @@
r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 6)) << 48 |
r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 7)) << 56
)
- mi = _le64toh(mi)
size -= 8
index += 8
v3 ^= mi
diff --git a/rpython/rlib/rurandom.py b/rpython/rlib/rurandom.py
--- a/rpython/rlib/rurandom.py
+++ b/rpython/rlib/rurandom.py
@@ -57,6 +57,8 @@
immortal=True, zero=True)
def urandom(context, n, signal_checker=None):
+ # NOTE: no dictionaries here: rsiphash24 calls this to
+ # initialize the random seed of string hashes
provider = context[0]
if not provider:
# This handle is never explicitly released. The operating
@@ -139,6 +141,8 @@
def urandom(context, n, signal_checker=None):
"Read n bytes from /dev/urandom."
+ # NOTE: no dictionaries here: rsiphash24 calls this to
+ # initialize the random seed of string hashes
result = []
if SYS_getrandom is not None:
n = _getrandom(n, result, signal_checker)
diff --git a/rpython/rlib/test/test_rsiphash.py
b/rpython/rlib/test/test_rsiphash.py
--- a/rpython/rlib/test/test_rsiphash.py
+++ b/rpython/rlib/test/test_rsiphash.py
@@ -1,4 +1,6 @@
+import os
from rpython.rlib.rsiphash import siphash24, choosen_seed
+from rpython.rlib.rsiphash import initialize_from_env, seed
from rpython.rtyper.lltypesystem import llmemory, rffi
@@ -42,3 +44,24 @@
def test_siphash24():
for expected, string in CASES:
assert check(string) == expected
+
+def test_fix_seed():
+ p = rffi.str2charp("foo")
+ adr = llmemory.cast_ptr_to_adr(p)
+
+ os.environ['PYTHONHASHSEED'] = '0'
+ initialize_from_env()
+ assert siphash24(adr, 3) == 15988776847138518036 # checked with CPython 3.5
+
+ os.environ['PYTHONHASHSEED'] = '123'
+ initialize_from_env()
+ assert siphash24(adr, 3) == 12577370453467666022 # checked with CPython 3.5
+
+ os.environ['PYTHONHASHSEED'] = 'random'
+ initialize_from_env()
+ hash1 = siphash24(adr, 3)
+ initialize_from_env()
+ hash2 = siphash24(adr, 3)
+ assert hash1 != hash2
+
+ rffi.free_charp(p)
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit