Hello! Mark H Weaver <m...@netris.org> skribis:
> l...@gnu.org (Ludovic Courtès) writes: >> As incredible as it may seem, ‘hash’ until now always returned 263 % n >> for structs, leading to interesting experiences when using structs as >> hash table keys. > > Yes, do you remember us talking about this long ago on IRC? I wanted to > fix this, but asked whether changing the hash function was okay for 2.0, > and you never gave me an answer :) I don’t remember, but I’m glad we agree that something must be done. It’s also a sign that email is better than IRC for these things, as far as I’m concerned. ;-) > Andy said that he improved the hash function on the master branch. > You might want to look at what he did. Thanks for the reminder. I just looked, it’s much nicer, but it doesn’t address this particular problem, so we could port it there afterward. > I guess this 'if' is to avoid an infinite loop if the struct points back > to itself. However, it apparently fails to detect cycles in the general > case. Yes, indeed. Here’s an updated patch that uses the ‘depth’ argument of ‘scm_hasher’ for that, as is done for pairs. Thanks for the review! Ludo’.
diff --git a/libguile/hash.c b/libguile/hash.c index a79f03d..8b00a0c 100644 --- a/libguile/hash.c +++ b/libguile/hash.c @@ -1,5 +1,6 @@ -/* Copyright (C) 1995,1996,1997, 2000, 2001, 2003, 2004, 2006, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. - * +/* Copyright (C) 1995, 1996, 1997, 2000, 2001, 2003, 2004, 2006, 2008, + * 2009, 2010, 2011, 2012 Free Software Foundation, Inc. + * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation; either version 3 of @@ -223,6 +224,8 @@ scm_hasher(SCM obj, unsigned long n, size_t d) significant_bits = (scm_t_uintptr) SCM_POINTER_VALUE (obj) >> 4UL; return (size_t) significant_bits % n; } + case scm_tcs_struct: + return scm_i_struct_hash (obj, n, d); case scm_tc7_wvect: case scm_tc7_vector: { diff --git a/libguile/struct.c b/libguile/struct.c index 5837b7c..7e8f68c 100644 --- a/libguile/struct.c +++ b/libguile/struct.c @@ -922,6 +922,53 @@ scm_struct_ihashq (SCM obj, unsigned long n, void *closure) return SCM_UNPACK (obj) % n; } +unsigned long +scm_i_struct_hash (SCM obj, unsigned long n, size_t depth) +#define FUNC_NAME "hash" +{ + SCM layout; + scm_t_bits *data; + size_t struct_size, field_num; + unsigned long hash; + + SCM_VALIDATE_STRUCT (1, obj); + + layout = SCM_STRUCT_LAYOUT (obj); + struct_size = scm_i_symbol_length (layout) / 2; + data = SCM_STRUCT_DATA (obj); + + hash = SCM_UNPACK (SCM_STRUCT_VTABLE (obj)) % n; + if (depth > 0) + for (field_num = 0; field_num < struct_size; field_num++) + { + int protection; + + protection = scm_i_symbol_ref (layout, field_num * 2 + 1); + if (protection != 'h' && protection != 'o') + { + int type; + type = scm_i_symbol_ref (layout, field_num * 2); + switch (type) + { + case 'p': + hash ^= scm_hasher (SCM_PACK (data[field_num]), n, + depth / 2); + break; + case 'u': + hash ^= data[field_num] % n; + break; + default: + /* Ignore 's' fields. */; + } + } + } + + /* FIXME: Tail elements should be taken into account. */ + + return hash % n; +} +#undef FUNC_NAME + SCM_DEFINE (scm_struct_vtable_name, "struct-vtable-name", 1, 0, 0, (SCM vtable), "Return the name of the vtable @var{vtable}.") diff --git a/libguile/struct.h b/libguile/struct.h index 3072f24..643fd9d 100644 --- a/libguile/struct.h +++ b/libguile/struct.h @@ -193,6 +193,8 @@ SCM_API void scm_print_struct (SCM exp, SCM port, scm_print_state *); SCM_INTERNAL SCM scm_i_struct_equalp (SCM s1, SCM s2); SCM_INTERNAL unsigned long scm_struct_ihashq (SCM, unsigned long, void *); +SCM_INTERNAL unsigned long scm_i_struct_hash (SCM s, unsigned long n, + size_t depth); SCM_INTERNAL SCM scm_i_alloc_struct (scm_t_bits *vtable_data, int n_words); SCM_INTERNAL void scm_i_struct_inherit_vtable_magic (SCM vtable, SCM obj); SCM_INTERNAL void scm_init_struct (void); diff --git a/test-suite/tests/structs.test b/test-suite/tests/structs.test index 431a014..0e3b241 100644 --- a/test-suite/tests/structs.test +++ b/test-suite/tests/structs.test @@ -126,7 +126,49 @@ (not (or (equal? (make-ball red "Bob") (make-ball green "Bob")) (equal? (make-ball red "Bob") (make-ball red "Bill")))))) + +(with-test-prefix "hash" + + (pass-if "simple structs" + (let* ((v (make-vtable "pr")) + (s1 (make-struct v 0 "hello")) + (s2 (make-struct v 0 "hello"))) + (= (hash s1 7777) (hash s2 7777)))) + + (pass-if "different structs" + (let* ((v (make-vtable "pr")) + (s1 (make-struct v 0 "hello")) + (s2 (make-struct v 0 "world"))) + (or (not (= (hash s1 7777) (hash s2 7777))) + (throw 'unresolved)))) + + (pass-if "different struct types" + (let* ((v1 (make-vtable "pr")) + (v2 (make-vtable "pr")) + (s1 (make-struct v1 0 "hello")) + (s2 (make-struct v2 0 "hello"))) + (or (not (= (hash s1 7777) (hash s2 7777))) + (throw 'unresolved)))) + (pass-if "more complex structs" + (let ((s1 (make-ball red (string-copy "Bob"))) + (s2 (make-ball red (string-copy "Bob")))) + (= (hash s1 7777) (hash s2 7777)))) + + (pass-if "struct with weird fields" + (let* ((v (make-vtable "prurph")) + (s1 (make-struct v 0 "hello" 123 "invisible-secret1")) + (s2 (make-struct v 0 "hello" 123 "invisible-secret2"))) + (= (hash s1 7777) (hash s2 7777)))) + + (pass-if "cyclic structs" + (let* ((v (make-vtable "pw")) + (a (make-struct v 0 #f)) + (b (make-struct v 0 a))) + (struct-set! a 0 b) + (and (hash a 7777) (hash b 7777) #t)))) + + ;; ;; make-struct ;;