Author: Armin Rigo <[email protected]>
Branch:
Changeset: r89768:01c2df465b1a
Date: 2017-01-25 17:29 +0100
http://bitbucket.org/pypy/pypy/changeset/01c2df465b1a/
Log: A siphash24 implementation based on CPython's, itself based on
https://github.com/majek/csiphash/
diff --git a/rpython/rlib/rsiphash.py b/rpython/rlib/rsiphash.py
new file mode 100644
--- /dev/null
+++ b/rpython/rlib/rsiphash.py
@@ -0,0 +1,156 @@
+import sys, os, struct
+from contextlib import contextmanager
+from rpython.rlib import rarithmetic
+from rpython.rlib.objectmodel import not_rpython, always_inline
+from rpython.rlib.rarithmetic import r_uint64
+from rpython.rlib.rawstorage import misaligned_is_fine
+from rpython.rtyper.lltypesystem import lltype, llmemory, rffi
+from rpython.rtyper.lltypesystem.lloperation import llop
+
+
+if sys.byteorder == 'little':
+ def _le64toh(x):
+ return x
+else:
+ _le64toh = rarithmetic.byteswap
+
+
+# Initialize the values of the secret seed: two 64-bit constants.
+# CPython picks a new seed every time 'python' starts. PyPy cannot do
+# that as easily because many details may rely on getting the same hash
+# value before and after translation. We can, however, pick a random
+# seed once per translation, which should already be quite good.
+
+@not_rpython
+def select_random_seed():
+ global k0, k1 # note: the globals k0, k1 are already byte-swapped
+ v0, v1 = struct.unpack("QQ", os.urandom(16))
+ k0 = r_uint64(v0)
+ k1 = r_uint64(v1)
+
+select_random_seed()
+
+@contextmanager
+def choosen_seed(new_k0, new_k1, test_misaligned_path=False):
+ global k0, k1, misaligned_is_fine
+ old = k0, k1, misaligned_is_fine
+ k0 = _le64toh(r_uint64(new_k0))
+ k1 = _le64toh(r_uint64(new_k1))
+ if test_misaligned_path:
+ misaligned_is_fine = False
+ yield
+ k0, k1, misaligned_is_fine = old
+
+def get_current_seed():
+ return _le64toh(k0), _le64toh(k1)
+
+
+magic0 = r_uint64(0x736f6d6570736575)
+magic1 = r_uint64(0x646f72616e646f6d)
+magic2 = r_uint64(0x6c7967656e657261)
+magic3 = r_uint64(0x7465646279746573)
+
+
+@always_inline
+def _rotate(x, b):
+ return (x << b) | (x >> (64 - b))
+
+@always_inline
+def _half_round(a, b, c, d, s, t):
+ a += b
+ c += d
+ b = _rotate(b, s) ^ a
+ d = _rotate(d, t) ^ c
+ a = _rotate(a, 32)
+ return a, b, c, d
+
+@always_inline
+def _double_round(v0, v1, v2, v3):
+ v0,v1,v2,v3 = _half_round(v0,v1,v2,v3,13,16)
+ v2,v1,v0,v3 = _half_round(v2,v1,v0,v3,17,21)
+ v0,v1,v2,v3 = _half_round(v0,v1,v2,v3,13,16)
+ v2,v1,v0,v3 = _half_round(v2,v1,v0,v3,17,21)
+ return v0, v1, v2, v3
+
+
+def siphash24(ptr, size):
+ """Takes a CCHARP pointer and a size. Returns the hash as a r_uint64,
+ which can then be casted to the expected type."""
+
+ addr_in = llmemory.cast_ptr_to_adr(ptr)
+ direct = (misaligned_is_fine or
+ (rffi.cast(lltype.Signed, addr_in) & 7) == 0)
+
+ b = r_uint64(size) << 56
+ v0 = k0 ^ magic0
+ v1 = k1 ^ magic1
+ v2 = k0 ^ magic2
+ v3 = k1 ^ magic3
+
+ index = 0
+ if direct:
+ while size >= 8:
+ mi = llop.raw_load(rffi.ULONGLONG, addr_in, index)
+ mi = _le64toh(mi)
+ size -= 8
+ index += 8
+ v3 ^= mi
+ v0, v1, v2, v3 = _double_round(v0, v1, v2, v3)
+ v0 ^= mi
+ else:
+ while size >= 8:
+ mi = (
+ r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index)) |
+ r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 1)) << 8 |
+ r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 2)) << 16 |
+ r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 3)) << 24 |
+ r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 4)) << 32 |
+ r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 5)) << 40 |
+ r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 6)) << 48 |
+ r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 7)) << 56
+ )
+ mi = _le64toh(mi)
+ size -= 8
+ index += 8
+ v3 ^= mi
+ v0, v1, v2, v3 = _double_round(v0, v1, v2, v3)
+ v0 ^= mi
+
+ t = r_uint64(0)
+ if size == 7:
+ t = r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 6)) << 48
+ size = 6
+ if size == 6:
+ t |= r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 5)) << 40
+ size = 5
+ if size == 5:
+ t |= r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 4)) << 32
+ size = 4
+ if size == 4:
+ if direct:
+ t |= r_uint64(llop.raw_load(rffi.UINT, addr_in, index))
+ size = 0
+ else:
+ t |= r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 3)) << 24
+ size = 3
+ if size == 3:
+ t |= r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 2)) << 16
+ size = 2
+ if size == 2:
+ t |= r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 1)) << 8
+ size = 1
+ if size == 1:
+ t |= r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index))
+ size = 0
+ assert size == 0
+
+ b |= _le64toh(t)
+
+ v3 ^= b
+ v0, v1, v2, v3 = _double_round(v0, v1, v2, v3)
+ v0 ^= b
+ v2 ^= 0xff
+ v0, v1, v2, v3 = _double_round(v0, v1, v2, v3)
+ v0, v1, v2, v3 = _double_round(v0, v1, v2, v3)
+
+ return (v0 ^ v1) ^ (v2 ^ v3)
diff --git a/rpython/rlib/test/test_rsiphash.py
b/rpython/rlib/test/test_rsiphash.py
new file mode 100644
--- /dev/null
+++ b/rpython/rlib/test/test_rsiphash.py
@@ -0,0 +1,44 @@
+from rpython.rlib.rsiphash import siphash24, choosen_seed
+from rpython.rtyper.lltypesystem import rffi
+
+
+CASES = [
+ (2323638336262702335 , ""),
+ (5150479602681463644 , "h"),
+ (1013213613370725794 , "he"),
+ (7028032310911240238 , "hel"),
+ (9535960132410784494 , "hell"),
+ (3256502711089771242 , "hello"),
+ (2389188832234450176 , "hello "),
+ (13253855839845990393, "hello w"),
+ (7850036019043917323 , "hello wo"),
+ (14283308628425005953, "hello wor"),
+ (9605549962279590084 , "hello worl"),
+ (16371281469632894235, "hello world"),
+ (7298637955795769949 , "hello world\x9a"),
+ (13530878135053370821, "hello world\xf3\x80"),
+ (1643533543579802994 , "\xffhel\x82lo world\xbc"),
+ (14632093238728197380, "hexlylxox rewqw"),
+ (3434253029196696424 , "hexlylxox rewqws"),
+ (9855754545877066788 , "hexlylxox rewqwsv"),
+ (5233065012564472454 , "hexlylxox rewqwkashdw89"),
+ (16768585622569081808, "hexlylxox rewqwkeashdw89"),
+ (17430482483431293463, "HEEExlylxox rewqwkashdw89"),
+ (695783005783737705 , "hello woadwealidewd 3829ez 32ig dxwaebderld"),
+]
+
+def check(s):
+ p = rffi.str2charp(s)
+ q = rffi.str2charp('?' + s)
+ with choosen_seed(0x8a9f065a358479f4, 0x11cb1e9ee7f40e1f,
+ test_misaligned_path=True):
+ x = siphash24(p, len(s))
+ y = siphash24(rffi.ptradd(q, 1), len(s))
+ rffi.free_charp(p)
+ rffi.free_charp(q)
+ assert x == y
+ return x
+
+def test_siphash24():
+ for expected, string in CASES:
+ assert check(string) == expected
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit