Repository : ssh://darcs.haskell.org//srv/darcs/packages/containers

On branch  : 

http://hackage.haskell.org/trac/ghc/changeset/9c41e7936f13ee3bad76722cf093dccf994a0361

>---------------------------------------------------------------

commit 9c41e7936f13ee3bad76722cf093dccf994a0361
Author: Joachim Breitner <[email protected]>
Date:   Tue Sep 20 20:31:45 2011 +0200

    Comments and other cleanup

>---------------------------------------------------------------

 Data/DenseIntSet.hs |   54 +++++++++++++++++++++++++++++++++++---------------
 1 files changed, 38 insertions(+), 16 deletions(-)

diff --git a/Data/DenseIntSet.hs b/Data/DenseIntSet.hs
index b762c2c..c4d30c9 100644
--- a/Data/DenseIntSet.hs
+++ b/Data/DenseIntSet.hs
@@ -33,6 +33,12 @@
 --      Information Coded In Alphanumeric/\", Journal of the ACM, 15(4),
 --      October 1968, pages 514-534.
 --
+-- Additionally, this implementation places bitmaps in the leaves of the tree.
+-- Their size is the natural size of a machine word (32 or 64 bits) and greatly
+-- reduce memory footprint and execution times for dense sets, e.g. sets where
+-- it is likely that many values lie close to each other. The asymptotics are
+-- not affected by this optimization.
+--
 -- Many operations have a worst-case complexity of /O(min(n,W))/.
 -- This means that the operation can become linear in the number of
 -- elements with a maximum of /W/ -- the number of bits in an 'Int'
@@ -210,6 +216,9 @@ data IntSet = Bin {-# UNPACK #-} !Prefix {-# UNPACK #-} 
!Mask !IntSet !IntSet
 -- Invariant: In Bin prefix mask left right, left consists of the elements that
 --            don't have the mask bit set; right is all the elements that do.
             | Tip {-# UNPACK #-} !Prefix {-# UNPACK #-} !BitMap
+-- Invariant: The Prefix is zero for all but the last 5 (on 32 bit arches) or 6
+--            bits (on 31 bit arches). The values of the map represented by a 
tip
+--            are the prefix plus the indices of the set bits in the bit map.
             | Nil
 
 
@@ -326,6 +335,7 @@ insertBM kx bm t = kx `seq` bm `seq`
 delete :: Int -> IntSet -> IntSet
 delete x = x `seq` deleteBM (x .&. highBits) (bit (x .&. lowBits))
 
+-- Deletes all values mentioned in the BitMap from the set.
 deleteBM :: Prefix -> BitMap -> IntSet -> IntSet
 deleteBM kx bm t = kx `seq` bm `seq`
   case t of
@@ -586,7 +596,7 @@ splitMember x t
         | kx>kx'      -> (t,False,Nil)
         | kx<kx'      -> (Nil,False,t)
         | otherwise   -> (tip kx' (bm .&.             2^(bi-1)-1)
-                         , bm `testBit` bi
+                         ,bm `testBit` bi
                          ,tip kx' (bm .&. complement (2^bi-1)))
       Nil -> (Nil,False,Nil)
   where kx = x .&. highBits
@@ -604,7 +614,7 @@ splitMember' x t
         | kx>kx'      -> (t,False,Nil)
         | kx<kx'      -> (Nil,False,t)
         | otherwise   -> (tip kx' (bm .&.             2^(bi-1)-1)
-                         , bm `testBit` bi
+                         ,bm `testBit` bi
                          ,tip kx' (bm .&. complement (2^bi-1)))
       Nil -> (Nil,False,Nil)
   where kx = x .&. highBits
@@ -832,6 +842,8 @@ fromDistinctAscList :: [Int] -> IntSet
 fromDistinctAscList []         = Nil
 fromDistinctAscList (z0 : zs0) = work (z0 .&. highBits) (bit (z0 .&. lowBits)) 
zs0 Nada
   where
+    -- 'work' accumulates all values that go into one tip, before passing this 
Tip
+    -- to 'reduce'
     work kx bm []     stk = finish kx (Tip kx bm) stk
     work kx bm (z:zs) stk | kx == z .&. highBits = work kx (bm `setBit` (z .&. 
lowBits)) zs stk
     work kx bm (z:zs) stk = reduce z zs (branchMask z kx) kx (Tip kx bm) stk
@@ -957,7 +969,7 @@ showsTree wide lbars rbars t
              showWide wide lbars .
              showsTree wide (withEmpty lbars) (withBar lbars) l
       Tip kx bm
-          -> showsBars lbars . showString " " . shows kx . showString " " .
+          -> showsBars lbars . showString " " . shows kx . showString " + " .
                                                 showsBitMap bm . showString 
"\n" 
       Nil -> showsBars lbars . showString "|\n"
 
@@ -971,7 +983,7 @@ showsTreeHang wide bars t
              showWide wide bars .
              showsTreeHang wide (withEmpty bars) r
       Tip kx bm
-          -> showsBars bars . showString " " . shows kx . showString " " .
+          -> showsBars bars . showString " " . shows kx . showString " + " .
                                                showsBitMap bm . showString 
"\n" 
       Nil -> showsBars bars . showString "|\n" 
 
@@ -994,7 +1006,7 @@ showsBitMap :: Word -> ShowS
 showsBitMap = showString . showBitMap
 
 showBitMap :: Word -> String
-showBitMap w = concat [ if w `testBit` i then "1" else "0" | i <- reverse 
lowBitsList ]
+showBitMap w = show $ foldrBits 0 (:) [] w
 
 node :: String
 node           = "+--"
@@ -1136,7 +1148,7 @@ highestBitMask x0
 {----------------------------------------------------------------------
   [highBits] and [lowBits] are two bit masks. Low bits has just the last n bits
   set to 1, while high bits is the complement, and n is the word size of the
-  architecture.
+  architecture. TODO: Does this work on s390?
 ----------------------------------------------------------------------}
 
 lowBits :: Int
@@ -1145,18 +1157,22 @@ lowBits = (bitSize (undefined::Word))-1
 highBits :: Int
 highBits = complement lowBits
 
-lowBitsList :: [Int]
-lowBitsList = [0 .. bitSize (undefined::Word)-1]
+{----------------------------------------------------------------------
+  Folds over bitmaps. These are crucial for good speed in toList, filter,
+  partition. Futher optimization is welcome.
+----------------------------------------------------------------------}
 
 foldlBits :: Int -> (a -> Int -> a) -> a -> Word -> a
-foldlBits shift f = go shift
+foldlBits shift f x bm = let lb = lowestBitSet bm 
+                         in  go (shift+lb) x (bm `shiftRL` lb)
   where STRICT_2_OF_3(go)
         go bi acc 0 = acc
         go bi acc n | n `testBit` 0 = go (bi + 1) (f acc bi) (n `shiftRL` 1)
                     | otherwise     = go (bi + 1)    acc     (n `shiftRL` 1)
 
 foldl'Bits :: Int -> (a -> Int -> a) -> a -> Word -> a
-foldl'Bits shift f = go shift
+foldl'Bits shift f x bm = let lb = lowestBitSet bm 
+                          in  go (shift+lb) x (bm `shiftRL` lb)
   where STRICT_1_OF_3(go)
         STRICT_2_OF_3(go)
         go bi acc 0 = acc
@@ -1172,11 +1188,22 @@ foldrBits shift f x bm = let lb = lowestBitSet bm
                 | otherwise     =       go (bi + 1) (n `shiftRL` 1)
 
 foldr'Bits :: Int -> (Int -> a -> a) -> a -> Word -> a
-foldr'Bits shift f x = go shift
+foldr'Bits shift f x bm = let lb = lowestBitSet bm 
+                          in  go (shift+lb) (bm `shiftRL` lb)
   where go bi 0 = x
         go bi n | n `testBit` 0 = f bi $! go (bi + 1) (n `shiftRL` 1)
                 | otherwise     =         go (bi + 1) (n `shiftRL` 1)
 
+{----------------------------------------------------------------------
+Finds the index of the lowest resp. highest bit set in a word. The following
+code works fine for bit sizes up to 64. A possibly faster but
+wordsize-dependant implementation based on multiplication and DeBrujn indeces
+is proposed by Edward Kmett
+<http://haskell.org/pipermail/libraries/2011-September/016749.html>
+Some architectures, notably x86, also offer machine instructions for this
+operation (bsr and bsl).
+----------------------------------------------------------------------}
+
 lowestBitSet :: Word -> Int
 lowestBitSet n0 =
     let (n1,b1) = if n0 .&. 0xFFFFFFFF /= 0 then (n0,0)  else (n0 `shiftRL` 
32, 32)
@@ -1200,11 +1227,6 @@ highestBitSet n0 =
 {-# INLINE highestBitSet #-}
 
 {----------------------------------------------------------------------
-   [wordMask] sets the last n bits to zero, where n is the word size of the
-   architecture
-----------------------------------------------------------------------}
-
-{----------------------------------------------------------------------
   [bitcount] as posted by David F. Place to haskell-cafe on April 11, 2006,
   based on the code on
   http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetKernighan,



_______________________________________________
Cvs-libraries mailing list
[email protected]
http://www.haskell.org/mailman/listinfo/cvs-libraries

Reply via email to