Hello!

Mark H Weaver <m...@netris.org> skribis:

> l...@gnu.org (Ludovic Courtès) writes:
>> As incredible as it may seem, ‘hash’ until now always returned 263 % n
>> for structs, leading to interesting experiences when using structs as
>> hash table keys.
>
> Yes, do you remember us talking about this long ago on IRC?  I wanted to
> fix this, but asked whether changing the hash function was okay for 2.0,
> and you never gave me an answer :)

I don’t remember, but I’m glad we agree that something must be done.
It’s also a sign that email is better than IRC for these things, as far
as I’m concerned.  ;-)

> Andy said that he improved the hash function on the master branch.
> You might want to look at what he did.

Thanks for the reminder.  I just looked, it’s much nicer, but it doesn’t
address this particular problem, so we could port it there afterward.

> I guess this 'if' is to avoid an infinite loop if the struct points back
> to itself.  However, it apparently fails to detect cycles in the general
> case.

Yes, indeed.

Here’s an updated patch that uses the ‘depth’ argument of ‘scm_hasher’
for that, as is done for pairs.

Thanks for the review!

Ludo’.

diff --git a/libguile/hash.c b/libguile/hash.c
index a79f03d..8b00a0c 100644
--- a/libguile/hash.c
+++ b/libguile/hash.c
@@ -1,5 +1,6 @@
-/*	Copyright (C) 1995,1996,1997, 2000, 2001, 2003, 2004, 2006, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
- * 
+/* Copyright (C) 1995, 1996, 1997, 2000, 2001, 2003, 2004, 2006, 2008,
+ *   2009, 2010, 2011, 2012 Free Software Foundation, Inc.
+ *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public License
  * as published by the Free Software Foundation; either version 3 of
@@ -223,6 +224,8 @@ scm_hasher(SCM obj, unsigned long n, size_t d)
 	significant_bits = (scm_t_uintptr) SCM_POINTER_VALUE (obj) >> 4UL;
 	return (size_t) significant_bits  % n;
       }
+    case scm_tcs_struct:
+      return scm_i_struct_hash (obj, n, d);
     case scm_tc7_wvect:
     case scm_tc7_vector:
       {
diff --git a/libguile/struct.c b/libguile/struct.c
index 5837b7c..7e8f68c 100644
--- a/libguile/struct.c
+++ b/libguile/struct.c
@@ -922,6 +922,53 @@ scm_struct_ihashq (SCM obj, unsigned long n, void *closure)
   return SCM_UNPACK (obj) % n;
 }
 
+unsigned long
+scm_i_struct_hash (SCM obj, unsigned long n, size_t depth)
+#define FUNC_NAME "hash"
+{
+  SCM layout;
+  scm_t_bits *data;
+  size_t struct_size, field_num;
+  unsigned long hash;
+
+  SCM_VALIDATE_STRUCT (1, obj);
+
+  layout = SCM_STRUCT_LAYOUT (obj);
+  struct_size = scm_i_symbol_length (layout) / 2;
+  data = SCM_STRUCT_DATA (obj);
+
+  hash = SCM_UNPACK (SCM_STRUCT_VTABLE (obj)) % n;
+  if (depth > 0)
+    for (field_num = 0; field_num < struct_size; field_num++)
+      {
+	int protection;
+
+	protection = scm_i_symbol_ref (layout, field_num * 2 + 1);
+	if (protection != 'h' && protection != 'o')
+	  {
+	    int type;
+	    type = scm_i_symbol_ref (layout, field_num * 2);
+	    switch (type)
+	      {
+	      case 'p':
+		hash ^= scm_hasher (SCM_PACK (data[field_num]), n,
+				    depth / 2);
+		break;
+	      case 'u':
+		hash ^= data[field_num] % n;
+		break;
+	      default:
+		/* Ignore 's' fields.  */;
+	      }
+	  }
+      }
+
+  /* FIXME: Tail elements should be taken into account.  */
+
+  return hash % n;
+}
+#undef FUNC_NAME
+
 SCM_DEFINE (scm_struct_vtable_name, "struct-vtable-name", 1, 0, 0, 
             (SCM vtable),
 	    "Return the name of the vtable @var{vtable}.")
diff --git a/libguile/struct.h b/libguile/struct.h
index 3072f24..643fd9d 100644
--- a/libguile/struct.h
+++ b/libguile/struct.h
@@ -193,6 +193,8 @@ SCM_API void scm_print_struct (SCM exp, SCM port, scm_print_state *);
 
 SCM_INTERNAL SCM scm_i_struct_equalp (SCM s1, SCM s2);
 SCM_INTERNAL unsigned long scm_struct_ihashq (SCM, unsigned long, void *);
+SCM_INTERNAL unsigned long scm_i_struct_hash (SCM s, unsigned long n,
+					      size_t depth);
 SCM_INTERNAL SCM scm_i_alloc_struct (scm_t_bits *vtable_data, int n_words);
 SCM_INTERNAL void scm_i_struct_inherit_vtable_magic (SCM vtable, SCM obj);
 SCM_INTERNAL void scm_init_struct (void);
diff --git a/test-suite/tests/structs.test b/test-suite/tests/structs.test
index 431a014..0e3b241 100644
--- a/test-suite/tests/structs.test
+++ b/test-suite/tests/structs.test
@@ -126,7 +126,49 @@
      (not (or (equal? (make-ball red "Bob") (make-ball green "Bob"))
 	      (equal? (make-ball red "Bob") (make-ball red "Bill"))))))
 
+
+(with-test-prefix "hash"
+
+  (pass-if "simple structs"
+    (let* ((v  (make-vtable "pr"))
+           (s1 (make-struct v 0 "hello"))
+           (s2 (make-struct v 0 "hello")))
+      (= (hash s1 7777) (hash s2 7777))))
+
+  (pass-if "different structs"
+    (let* ((v  (make-vtable "pr"))
+           (s1 (make-struct v 0 "hello"))
+           (s2 (make-struct v 0 "world")))
+      (or (not (= (hash s1 7777) (hash s2 7777)))
+          (throw 'unresolved))))
+
+  (pass-if "different struct types"
+    (let* ((v1 (make-vtable "pr"))
+           (v2 (make-vtable "pr"))
+           (s1 (make-struct v1 0 "hello"))
+           (s2 (make-struct v2 0 "hello")))
+      (or (not (= (hash s1 7777) (hash s2 7777)))
+          (throw 'unresolved))))
 
+  (pass-if "more complex structs"
+    (let ((s1 (make-ball red (string-copy "Bob")))
+          (s2 (make-ball red (string-copy "Bob"))))
+      (= (hash s1 7777) (hash s2 7777))))
+
+  (pass-if "struct with weird fields"
+    (let* ((v  (make-vtable "prurph"))
+           (s1 (make-struct v 0 "hello" 123 "invisible-secret1"))
+           (s2 (make-struct v 0 "hello" 123 "invisible-secret2")))
+      (= (hash s1 7777) (hash s2 7777))))
+
+  (pass-if "cyclic structs"
+    (let* ((v (make-vtable "pw"))
+           (a (make-struct v 0 #f))
+           (b (make-struct v 0 a)))
+      (struct-set! a 0 b)
+      (and (hash a 7777) (hash b 7777) #t))))
+
+
 ;;
 ;; make-struct
 ;;

Reply via email to