wingo pushed a commit to branch main
in repository guile.

commit 5959531c54d1a164e638731b8d79633f454a3dbd
Author: Andy Wingo <wi...@pobox.com>
AuthorDate: Mon Nov 20 14:07:03 2023 +0100

    Allow target runtime to override symbol hash
    
    Also rework so that the symbol hash uses the low bits instead of high
    bits.  We can do this because, for the guile-vm target, now we compute
    the full target hash.
    
    * module/language/cps/guile-vm.scm (jenkins-lookup3-hashword2):
    (target-symbol-hash, target-symbol-hash-bits): New exported functions..
    * module/language/cps/switch.scm (optimize-branch-chain): Change to use
    target-symbol-hash and target-symbol-hash-bits from the current
    target-runtime.
---
 libguile/hash.c                  |  4 ++-
 module/language/cps/guile-vm.scm | 71 +++++++++++++++++++++++++++++++++++++++-
 module/language/cps/switch.scm   | 63 ++++++++++++++++-------------------
 3 files changed, 102 insertions(+), 36 deletions(-)

diff --git a/libguile/hash.c b/libguile/hash.c
index 5abdfe397..a038a11bf 100644
--- a/libguile/hash.c
+++ b/libguile/hash.c
@@ -1,4 +1,4 @@
-/* Copyright 1995-1997,2000-2001,2003-2004,2006,2008-2015,2017-2018,2020
+/* Copyright 1995-1997,2000-2001,2003-2004,2006,2008-2015,2017-2018,2020,2023
      Free Software Foundation, Inc.
 
    This file is part of Guile.
@@ -136,6 +136,8 @@ wide_string_hash (const scm_t_wchar *str, size_t len)
   return ret;
 }
 
+/* If you change this to a different hash, also update (language cps
+   guile-vm).  */
 unsigned long
 scm_i_string_hash (SCM str)
 {
diff --git a/module/language/cps/guile-vm.scm b/module/language/cps/guile-vm.scm
index f330128f2..772783349 100644
--- a/module/language/cps/guile-vm.scm
+++ b/module/language/cps/guile-vm.scm
@@ -27,8 +27,77 @@
   #:use-module (language cps guile-vm loop-instrumentation)
   #:use-module (language cps guile-vm lower-primcalls)
   #:use-module (language cps guile-vm reify-primitives)
+  #:use-module (system base target)
   #:export (make-lowerer
-            available-optimizations))
+            available-optimizations
+            target-symbol-hash
+            target-symbol-hash-bits))
+
+;; This hash function is originally from
+;; http://burtleburtle.net/bob/c/lookup3.c by Bob Jenkins, May 2006,
+;; Public Domain.  No warranty.
+(define (jenkins-lookup3-hashword2 str)
+  (define (u32 x) (logand x #xffffFFFF))
+  (define (shl x n) (u32 (ash x n)))
+  (define (shr x n) (ash x (- n)))
+  (define (rot x n) (logior (shl x n) (shr x (- 32 n))))
+  (define (add x y) (u32 (+ x y)))
+  (define (sub x y) (u32 (- x y)))
+  (define (xor x y) (logxor x y))
+
+  (define (mix a b c)
+    (let* ((a (sub a c)) (a (xor a (rot c 4)))  (c (add c b))
+           (b (sub b a)) (b (xor b (rot a 6)))  (a (add a c))
+           (c (sub c b)) (c (xor c (rot b 8)))  (b (add b a))
+           (a (sub a c)) (a (xor a (rot c 16))) (c (add c b))
+           (b (sub b a)) (b (xor b (rot a 19))) (a (add a c))
+           (c (sub c b)) (c (xor c (rot b 4)))  (b (add b a)))
+      (values a b c)))
+  (define (final a b c)
+    (let* ((c (xor c b)) (c (sub c (rot b 14)))
+           (a (xor a c)) (a (sub a (rot c 11)))
+           (b (xor b a)) (b (sub b (rot a 25)))
+           (c (xor c b)) (c (sub c (rot b 16)))
+           (a (xor a c)) (a (sub a (rot c 4)))
+           (b (xor b a)) (b (sub b (rot a 14)))
+           (c (xor c b)) (c (sub c (rot b 24))))
+      (values a b c)))
+
+  (define len (string-length str))
+  (define (add-char x index)
+    (add x (char->integer (string-ref str index))))
+
+  (let ((init (add #xdeadbeef (add (shl len 2) 47))))
+    (let lp ((i 0) (a init) (b init) (c init))
+      (let ((remaining (- len i)))
+        (cond
+         ((< 3 remaining)
+          (call-with-values (lambda ()
+                              (mix (add-char a i)
+                                   (add-char b (+ i 1))
+                                   (add-char c (+ i 2))))
+            (lambda (a b c)
+              (lp (+ i 3) a b c))))
+         (else
+          (let* ((a (if (<= 1 remaining) (add-char a i) a))
+                 (b (if (<= 2 remaining) (add-char b (+ i 1)) b))
+                 (c (if (<= 3 remaining) (add-char c (+ i 2)) c)))
+            (final a b c))))))))
+
+(define (target-symbol-hash str)
+  (call-with-values (lambda () (jenkins-lookup3-hashword2 str))
+    (lambda (a b c)
+      ;; The high 32 bits of the hash on a 64-bit platform are
+      ;; equivalent to the hash on a 32-bit platform.  The top two bits
+      ;; are zero to allow the hash to fit in a fixnum.
+      (ash (case (target-word-size)
+             ((4) c)
+             ((8) (logior (ash c 32) b))
+             (else (error "unexpected target word size" (target-word-size))))
+           -2))))
+
+(define target-symbol-hash-bits
+  (- (* (target-word-size) 8) 2))
 
 (define (make-lowerer optimization-level opts)
   (lambda (exp env)
diff --git a/module/language/cps/switch.scm b/module/language/cps/switch.scm
index f4ae40567..84c5d0f77 100644
--- a/module/language/cps/switch.scm
+++ b/module/language/cps/switch.scm
@@ -148,8 +148,8 @@ object."
             (hash-table->alist specials)
             (hash-table->sorted-alist symbols
                                       (lambda (s1 s2)
-                                        (< (symbol-hash s1)
-                                           (symbol-hash s2))))
+                                        (string< (symbol->string s1)
+                                                 (symbol->string s2))))
             (hash-table->alist others))))
 
 ;; Leave any chain this long or less as is.
@@ -320,44 +320,39 @@ object."
      ((null? targets)
       (with-cps cps next))
      ((length>? targets *symbol-hash-dispatch-min-length*)
-      ;; Hash dispatch.  Targets already sorted by symbol-hash.  The
-      ;; symbol-hash accessor returns the hash of a symbol, which is the
-      ;; hash of its associated stringbuf.  The high 32 bits of the hash
-      ;; on a 64-bit platform are equivalent to the hash on a 32-bit
-      ;; platform.  The top two bits are zero, to ensure that hash
-      ;; values can be represented as fixnums.  We therefore dispatch on
-      ;; the top N bits, skipping 2 bits, where N <= 30, for the
-      ;; smallest N for which len(targets) <= 2^N.
-      (let* ((nbits (let ((ntargets (length targets)))
+      ;; Hash dispatch.  The value has symbol-hash-bits significant
+      ;; bits.  We dispatch on the bottom N bits of the significant
+      ;; bits, where N <= symbol-hash-bits, for the smallest N for which
+      ;; len(targets) <= 2^N.
+      (let* ((backend (resolve-interface `(language cps ,(target-runtime))))
+             (symbol-hash (module-ref backend 'target-symbol-hash))
+             (symbol-hash-bits (module-ref backend 'target-symbol-hash-bits))
+             (nbits (let ((ntargets (length targets)))
                       (let lp ((nbits 2))
-                        (if (<= ntargets (ash 1 nbits))
-                            nbits
-                            (lp (1+ nbits))))))
-             (host-shift (- (* (with-native-target target-word-size) 8) 2 
nbits))
-             (target-shift (- (* (target-word-size) 8) 2 nbits))
+                        (cond
+                         ((= nbits symbol-hash-bits) nbits)
+                         ((<= ntargets (ash 1 nbits)) nbits)
+                         (else (lp (1+ nbits)))))))
              (nbuckets (ash 1 nbits))
              (buckets (make-vector nbuckets '()))
              (kt* (make-vector nbuckets exit)))
-        (define (next-targets targets next-bucket)
-          (let lp ((out '()) (targets targets))
-            (match targets
-              (() (values out targets))
-              (((sym . k) . targets*)
-               (if (< (symbol-hash sym) next-bucket)
-                   (lp (acons sym k out) targets*)
-                   (values out targets))))))
-        (let lp ((cps cps) (i 0) (targets targets))
+        (define (symbol->bucket sym)
+          (logand (1- nbuckets) (symbol-hash (symbol->string sym))))
+        (define (vector-push! v i x)
+          (vector-set! v i (cons x (vector-ref v i))))
+        (for-each (match-lambda
+                    ((and pair (sym . target))
+                     (vector-push! buckets (symbol->bucket sym) pair)))
+                  targets)
+        (let lp ((cps cps) (i 0))
           (cond
            ((< i nbuckets)
             (call-with-values (lambda ()
-                                (next-targets targets (ash (1+ i) host-shift)))
-              (lambda (bucket targets)
-                (call-with-values
-                    (lambda ()
-                      (reify-chain cps var bucket 'eq-constant? exit))
-                  (lambda (cps k)
-                    (vector-set! kt* i k)
-                    (lp cps (1+ i) targets))))))
+                                (reify-chain cps var (vector-ref buckets i)
+                                             'eq-constant? exit))
+              (lambda (cps k)
+                (vector-set! kt* i k)
+                (lp cps (1+ i)))))
            (else
             (with-cps cps
               (letv hash idx)
@@ -367,7 +362,7 @@ object."
               (letk kidx
                     ($kargs ('hash) (hash)
                       ($continue kswitch #f
-                        ($primcall 'ursh/immediate target-shift (hash)))))
+                        ($primcall 'ulogand/immediate (1- nbuckets) (hash)))))
               (letk khash
                     ($kargs () ()
                       ($continue kidx #f

Reply via email to