Repository : ssh://darcs.haskell.org//srv/darcs/ghc

On branch  : master

http://hackage.haskell.org/trac/ghc/changeset/39d766ee942a5395a103e9bfd60bac7062ceb70d

>---------------------------------------------------------------

commit 39d766ee942a5395a103e9bfd60bac7062ceb70d
Author: Ian Lynagh <[email protected]>
Date:   Sat Jun 30 00:45:42 2012 +0100

    Copy Data.HashTable's hashString into our Util module
    
    Data.HashTable is now deprecated and will soon be removed, but
    deSugar/Coverage.lhs uses hashString.

>---------------------------------------------------------------

 compiler/deSugar/Coverage.lhs |    1 -
 compiler/utils/Util.lhs       |   73 ++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 72 insertions(+), 2 deletions(-)

diff --git a/compiler/deSugar/Coverage.lhs b/compiler/deSugar/Coverage.lhs
index c29f39e..2a4486e 100644
--- a/compiler/deSugar/Coverage.lhs
+++ b/compiler/deSugar/Coverage.lhs
@@ -41,7 +41,6 @@ import Trace.Hpc.Mix
 import Trace.Hpc.Util
 
 import BreakArray
-import Data.HashTable   ( hashString )
 import Data.Map (Map)
 import qualified Data.Map as Map
 \end{code}
diff --git a/compiler/utils/Util.lhs b/compiler/utils/Util.lhs
index 0d2f741..9d12946 100644
--- a/compiler/utils/Util.lhs
+++ b/compiler/utils/Util.lhs
@@ -92,7 +92,10 @@ module Util (
         abstractConstr, abstractDataType, mkNoRepType,
 
         -- * Utils for printing C code
-        charToC
+        charToC,
+
+        -- * Hashing
+        hashString,
     ) where
 
 #include "HsVersions.h"
@@ -115,6 +118,7 @@ import System.Directory ( doesDirectoryExist, 
getModificationTime )
 import System.FilePath
 
 import Data.Char        ( isUpper, isAlphaNum, isSpace, chr, ord, isDigit )
+import Data.Int
 import Data.Ratio       ( (%) )
 import Data.Ord         ( comparing )
 import Data.Bits
@@ -1049,3 +1053,70 @@ charToC w =
                          chr (ord '0' + ord c         `mod` 8)]
 \end{code}
 
+%************************************************************************
+%*                                                                      *
+\subsection[Utils-Hashing]{Utils for hashing}
+%*                                                                      *
+%************************************************************************
+
+\begin{code}
+-- | A sample hash function for Strings.  We keep multiplying by the
+-- golden ratio and adding.  The implementation is:
+--
+-- > hashString = foldl' f golden
+-- >   where f m c = fromIntegral (ord c) * magic + hashInt32 m
+-- >         magic = 0xdeadbeef
+--
+-- Where hashInt32 works just as hashInt shown above.
+--
+-- Knuth argues that repeated multiplication by the golden ratio
+-- will minimize gaps in the hash space, and thus it's a good choice
+-- for combining together multiple keys to form one.
+--
+-- Here we know that individual characters c are often small, and this
+-- produces frequent collisions if we use ord c alone.  A
+-- particular problem are the shorter low ASCII and ISO-8859-1
+-- character strings.  We pre-multiply by a magic twiddle factor to
+-- obtain a good distribution.  In fact, given the following test:
+--
+-- > testp :: Int32 -> Int
+-- > testp k = (n - ) . length . group . sort . map hs . take n $ ls
+-- >   where ls = [] : [c : l | l <- ls, c <- ['\0'..'\xff']]
+-- >         hs = foldl' f golden
+-- >         f m c = fromIntegral (ord c) * k + hashInt32 m
+-- >         n = 100000
+--
+-- We discover that testp magic = 0.
+hashString :: String -> Int32
+hashString = foldl' f golden
+   where f m c = fromIntegral (ord c) * magic + hashInt32 m
+         magic = 0xdeadbeef
+
+golden :: Int32
+golden = 1013904242 -- = round ((sqrt 5 - 1) * 2^32) :: Int32
+-- was -1640531527 = round ((sqrt 5 - 1) * 2^31) :: Int32
+-- but that has bad mulHi properties (even adding 2^32 to get its inverse)
+-- Whereas the above works well and contains no hash duplications for
+-- [-32767..65536]
+
+-- | A sample (and useful) hash function for Int32,
+-- implemented by extracting the uppermost 32 bits of the 64-bit
+-- result of multiplying by a 33-bit constant.  The constant is from
+-- Knuth, derived from the golden ratio:
+--
+-- > golden = round ((sqrt 5 - 1) * 2^32)
+--
+-- We get good key uniqueness on small inputs
+-- (a problem with previous versions):
+--  (length $ group $ sort $ map hashInt32 [-32767..65536]) == 65536 + 32768
+--
+hashInt32 :: Int32 -> Int32
+hashInt32 x = mulHi x golden + x
+
+-- hi 32 bits of a x-bit * 32 bit -> 64-bit multiply
+mulHi :: Int32 -> Int32 -> Int32
+mulHi a b = fromIntegral (r `shiftR` 32)
+   where r :: Int64
+         r = fromIntegral a * fromIntegral b
+\end{code}
+



_______________________________________________
Cvs-ghc mailing list
[email protected]
http://www.haskell.org/mailman/listinfo/cvs-ghc

Reply via email to