Author: Armin Rigo <[email protected]>
Branch: 
Changeset: r89768:01c2df465b1a
Date: 2017-01-25 17:29 +0100
http://bitbucket.org/pypy/pypy/changeset/01c2df465b1a/

Log:    A siphash24 implementation based on CPython's, itself based on
        https://github.com/majek/csiphash/

diff --git a/rpython/rlib/rsiphash.py b/rpython/rlib/rsiphash.py
new file mode 100644
--- /dev/null
+++ b/rpython/rlib/rsiphash.py
@@ -0,0 +1,156 @@
+import sys, os, struct
+from contextlib import contextmanager
+from rpython.rlib import rarithmetic
+from rpython.rlib.objectmodel import not_rpython, always_inline
+from rpython.rlib.rarithmetic import r_uint64
+from rpython.rlib.rawstorage import misaligned_is_fine
+from rpython.rtyper.lltypesystem import lltype, llmemory, rffi
+from rpython.rtyper.lltypesystem.lloperation import llop
+
+
+if sys.byteorder == 'little':
+    def _le64toh(x):
+        return x
+else:
+    _le64toh = rarithmetic.byteswap
+
+
+# Initialize the values of the secret seed: two 64-bit constants.
+# CPython picks a new seed every time 'python' starts.  PyPy cannot do
+# that as easily because many details may rely on getting the same hash
+# value before and after translation.  We can, however, pick a random
+# seed once per translation, which should already be quite good.
+
+@not_rpython
+def select_random_seed():
+    global k0, k1    # note: the globals k0, k1 are already byte-swapped
+    v0, v1 = struct.unpack("QQ", os.urandom(16))
+    k0 = r_uint64(v0)
+    k1 = r_uint64(v1)
+
+select_random_seed()
+
+@contextmanager
+def choosen_seed(new_k0, new_k1, test_misaligned_path=False):
+    global k0, k1, misaligned_is_fine
+    old = k0, k1, misaligned_is_fine
+    k0 = _le64toh(r_uint64(new_k0))
+    k1 = _le64toh(r_uint64(new_k1))
+    if test_misaligned_path:
+        misaligned_is_fine = False
+    yield
+    k0, k1, misaligned_is_fine = old
+
+def get_current_seed():
+    return _le64toh(k0), _le64toh(k1)
+
+
+magic0 = r_uint64(0x736f6d6570736575)
+magic1 = r_uint64(0x646f72616e646f6d)
+magic2 = r_uint64(0x6c7967656e657261)
+magic3 = r_uint64(0x7465646279746573)
+
+
+@always_inline
+def _rotate(x, b):
+    return (x << b) | (x >> (64 - b))
+
+@always_inline
+def _half_round(a, b, c, d, s, t):
+    a += b
+    c += d
+    b = _rotate(b, s) ^ a
+    d = _rotate(d, t) ^ c
+    a = _rotate(a, 32)
+    return a, b, c, d
+
+@always_inline
+def _double_round(v0, v1, v2, v3):
+    v0,v1,v2,v3 = _half_round(v0,v1,v2,v3,13,16)
+    v2,v1,v0,v3 = _half_round(v2,v1,v0,v3,17,21)
+    v0,v1,v2,v3 = _half_round(v0,v1,v2,v3,13,16)
+    v2,v1,v0,v3 = _half_round(v2,v1,v0,v3,17,21)
+    return v0, v1, v2, v3
+
+
+def siphash24(ptr, size):
+    """Takes a CCHARP pointer and a size.  Returns the hash as a r_uint64,
+    which can then be casted to the expected type."""
+
+    addr_in = llmemory.cast_ptr_to_adr(ptr)
+    direct = (misaligned_is_fine or
+                 (rffi.cast(lltype.Signed, addr_in) & 7) == 0)
+
+    b = r_uint64(size) << 56
+    v0 = k0 ^ magic0
+    v1 = k1 ^ magic1
+    v2 = k0 ^ magic2
+    v3 = k1 ^ magic3
+
+    index = 0
+    if direct:
+        while size >= 8:
+            mi = llop.raw_load(rffi.ULONGLONG, addr_in, index)
+            mi = _le64toh(mi)
+            size -= 8
+            index += 8
+            v3 ^= mi
+            v0, v1, v2, v3 = _double_round(v0, v1, v2, v3)
+            v0 ^= mi
+    else:
+        while size >= 8:
+            mi = (
+                r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index)) |
+                r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 1)) << 8 |
+                r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 2)) << 16 |
+                r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 3)) << 24 |
+                r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 4)) << 32 |
+                r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 5)) << 40 |
+                r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 6)) << 48 |
+                r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 7)) << 56
+            )
+            mi = _le64toh(mi)
+            size -= 8
+            index += 8
+            v3 ^= mi
+            v0, v1, v2, v3 = _double_round(v0, v1, v2, v3)
+            v0 ^= mi
+
+    t = r_uint64(0)
+    if size == 7:
+        t = r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 6)) << 48
+        size = 6
+    if size == 6:
+        t |= r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 5)) << 40
+        size = 5
+    if size == 5:
+        t |= r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 4)) << 32
+        size = 4
+    if size == 4:
+        if direct:
+            t |= r_uint64(llop.raw_load(rffi.UINT, addr_in, index))
+            size = 0
+        else:
+            t |= r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 3)) << 24
+            size = 3
+    if size == 3:
+        t |= r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 2)) << 16
+        size = 2
+    if size == 2:
+        t |= r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 1)) << 8
+        size = 1
+    if size == 1:
+        t |= r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index))
+        size = 0
+    assert size == 0
+
+    b |= _le64toh(t)
+
+    v3 ^= b
+    v0, v1, v2, v3 = _double_round(v0, v1, v2, v3)
+    v0 ^= b
+    v2 ^= 0xff
+    v0, v1, v2, v3 = _double_round(v0, v1, v2, v3)
+    v0, v1, v2, v3 = _double_round(v0, v1, v2, v3)
+
+    return (v0 ^ v1) ^ (v2 ^ v3)
diff --git a/rpython/rlib/test/test_rsiphash.py 
b/rpython/rlib/test/test_rsiphash.py
new file mode 100644
--- /dev/null
+++ b/rpython/rlib/test/test_rsiphash.py
@@ -0,0 +1,44 @@
+from rpython.rlib.rsiphash import siphash24, choosen_seed
+from rpython.rtyper.lltypesystem import rffi
+
+
+CASES = [
+    (2323638336262702335 , ""),
+    (5150479602681463644 , "h"),
+    (1013213613370725794 , "he"),
+    (7028032310911240238 , "hel"),
+    (9535960132410784494 , "hell"),
+    (3256502711089771242 , "hello"),
+    (2389188832234450176 , "hello "),
+    (13253855839845990393, "hello w"),
+    (7850036019043917323 , "hello wo"),
+    (14283308628425005953, "hello wor"),
+    (9605549962279590084 , "hello worl"),
+    (16371281469632894235, "hello world"),
+    (7298637955795769949 , "hello world\x9a"),
+    (13530878135053370821, "hello world\xf3\x80"),
+    (1643533543579802994 , "\xffhel\x82lo world\xbc"),
+    (14632093238728197380, "hexlylxox rewqw"),
+    (3434253029196696424 , "hexlylxox rewqws"),
+    (9855754545877066788 , "hexlylxox rewqwsv"),
+    (5233065012564472454 , "hexlylxox rewqwkashdw89"),
+    (16768585622569081808, "hexlylxox rewqwkeashdw89"),
+    (17430482483431293463, "HEEExlylxox rewqwkashdw89"),
+    (695783005783737705  , "hello woadwealidewd 3829ez 32ig dxwaebderld"),
+]
+
+def check(s):
+    p = rffi.str2charp(s)
+    q = rffi.str2charp('?' + s)
+    with choosen_seed(0x8a9f065a358479f4, 0x11cb1e9ee7f40e1f,
+                      test_misaligned_path=True):
+        x = siphash24(p, len(s))
+        y = siphash24(rffi.ptradd(q, 1), len(s))
+    rffi.free_charp(p)
+    rffi.free_charp(q)
+    assert x == y
+    return x
+
+def test_siphash24():
+    for expected, string in CASES:
+        assert check(string) == expected
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to