On Tue, Jan 05, 2010 at 04:40:55PM -0800, Bryan O'Sullivan wrote:
> You've got an extra level of indirection there due to the use of data
> instead of newtype, so you're paying an additional boxing penalty for
> everything except your first case. Are you compiling with
> -funbox-strict-fields?

I've changed those data's to newtype's but using words still
seems better, unless mapping and folding over bytes is more
important.  In that case maybe storing the bytes separately might
be better.  Maybe a more complex/realistic benchmark?

I've also rerun the benchmark in a x86-32 chroot.  In this
environment Word32 seems to win over Word64.  But, who cares
about 32 bits anyway? ;)

I'm attaching the source code and the summary results.
Everything was compiled with 'ghc -fforce-recomp -O3'.

--
Felipe.
{-# LANGUAGE RankNTypes #-}

{-
    This program benchmarks several different ways to store 16 bytes in type.
-}

import Criterion.Main
import System.IO (hSetBuffering, BufferMode(..), stdout)

import Data.Bits
import Data.List
import Data.Maybe
import Data.Word

import qualified Data.Array         as A
import qualified Data.Array.Unboxed as U
import qualified Data.Array.Vector  as V
import qualified Data.ByteString    as B

data InBytes = BY !Word8 !Word8 !Word8 !Word8 !Word8 !Word8 !Word8 !Word8
                  !Word8 !Word8 !Word8 !Word8 !Word8 !Word8 !Word8 !Word8
    deriving (Eq, Ord)

data InWords = WO !Word32 !Word32 !Word32 !Word32
    deriving (Eq, Ord)

data InWords64 = WO64 !Word64 !Word64
    deriving (Eq, Ord)

newtype InList       = LI [Word8]              deriving (Eq, Ord)
newtype InByteString = BS B.ByteString         deriving (Eq, Ord)
newtype InArray      = AR (A.Array Int Word8)  deriving (Eq, Ord)
newtype InUArray     = UA (U.UArray Int Word8) deriving (Eq, Ord)
newtype InVector     = VE (V.UArr Word8)       deriving (Eq)

instance Ord InVector where
    compare (VE va) (VE vb) = V.foldlU lexo EQ $ V.zipU va vb
        where lexo EQ (a V.:*: b) = compare a b
              lexo r  _         = r


class TestSubject d where
    toList      :: d -> [Word8]
    fromList    :: [Word8] -> Maybe d
    mapBytes    :: (Word8 -> a) -> d -> [a]
    foldBytes   :: (a -> Word8 -> a) -> a -> d -> a
    unfoldBytes :: (a -> Maybe (Word8, a)) -> a -> Maybe d
    build       :: Word8 -> Word8 -> Word8 -> Word8
                -> Word8 -> Word8 -> Word8 -> Word8
                -> Word8 -> Word8 -> Word8 -> Word8
                -> Word8 -> Word8 -> Word8 -> Word8 -> d
    build b0 b1 b2 b3 b4 b5 b6 b7 b8 b9 ba bb bc bd be bf =
       d [b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf]
       where d = fromJust . fromList


instance TestSubject InBytes where
    toList (BY b0 b1 b2 b3 b4 b5 b6 b7 b8 b9 ba bb bc bd be bf) =
        [b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf]

    fromList [b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf] =
        Just $ BY b0 b1 b2 b3 b4 b5 b6 b7 b8 b9 ba bb bc bd be bf
    fromList _ = Nothing

    mapBytes f (BY b0 b1 b2 b3 b4 b5 b6 b7 b8 b9 ba bb bc bd be bf) =
        [f b0, f b1, f b2, f b3, f b4, f b5, f b6, f b7,
         f b8, f b9, f ba, f bb, f bc, f bd, f be, f bf]

    foldBytes f z (BY b0 b1 b2 b3 b4 b5 b6 b7 b8 b9 ba bb bc bd be bf) =
        f' bf $ f' be $ f' bd $ f' bc $ f' bb $ f' ba $ f' b9 $ f' b8 $
        f' b7 $ f' b6 $ f' b5 $ f' b4 $ f' b3 $ f' b2 $ f' b1 $ f' b0 $ z
        where f' = flip f

    unfoldBytes f z = do
        (b0, a0) <- f z
        (b1, a1) <- f a0
        (b2, a2) <- f a1
        (b3, a3) <- f a2
        (b4, a4) <- f a3
        (b5, a5) <- f a4
        (b6, a6) <- f a5
        (b7, a7) <- f a6
        (b8, a8) <- f a7
        (b9, a9) <- f a8
        (ba, aa) <- f a9
        (bb, ab) <- f aa
        (bc, ac) <- f ab
        (bd, ad) <- f ac
        (be, ae) <- f ad
        (bf,_af) <- f ae
        return (BY b0 b1 b2 b3 b4 b5 b6 b7 b8 b9 ba bb bc bd be bf)

    build = BY

instance TestSubject InWords where
    toList (WO w0 w1 w2 w3) =
        [byte 3 w0, byte 2 w0, byte 1 w0, byte 0 w0,
         byte 3 w1, byte 2 w1, byte 1 w1, byte 0 w1,
         byte 3 w2, byte 2 w2, byte 1 w2, byte 0 w2,
         byte 3 w3, byte 2 w3, byte 1 w3, byte 0 w3]

    fromList [b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf] =
        Just $ build b0 b1 b2 b3 b4 b5 b6 b7 b8 b9 ba bb bc bd be bf
    fromList _ = Nothing

    mapBytes f (WO w0 w1 w2 w3) =
        [f $ byte 3 w0, f $ byte 2 w0, f $ byte 1 w0, f $ byte 0 w0,
         f $ byte 3 w1, f $ byte 2 w1, f $ byte 1 w1, f $ byte 0 w1,
         f $ byte 3 w2, f $ byte 2 w2, f $ byte 1 w2, f $ byte 0 w2,
         f $ byte 3 w3, f $ byte 2 w3, f $ byte 1 w3, f $ byte 0 w3]

    foldBytes f z (WO w0 w1 w2 w3) =
        f' (byte 3 w0) $ f' (byte 2 w0) $ f' (byte 1 w0) $ f' (byte 0 w0) $
        f' (byte 3 w1) $ f' (byte 2 w1) $ f' (byte 1 w1) $ f' (byte 0 w1) $
        f' (byte 3 w2) $ f' (byte 2 w2) $ f' (byte 1 w2) $ f' (byte 0 w2) $
        f' (byte 3 w3) $ f' (byte 2 w3) $ f' (byte 1 w3) $ f' (byte 0 w3) $ z
        where f' = flip f

    unfoldBytes f z = do
        (b0, a0) <- f z
        (b1, a1) <- f a0
        (b2, a2) <- f a1
        (b3, a3) <- f a2
        (b4, a4) <- f a3
        (b5, a5) <- f a4
        (b6, a6) <- f a5
        (b7, a7) <- f a6
        (b8, a8) <- f a7
        (b9, a9) <- f a8
        (ba, aa) <- f a9
        (bb, ab) <- f aa
        (bc, ac) <- f ab
        (bd, ad) <- f ac
        (be, ae) <- f ad
        (bf,_af) <- f ae
        return (build b0 b1 b2 b3 b4 b5 b6 b7 b8 b9 ba bb bc bd be bf)

    build b0 b1 b2 b3 b4 b5 b6 b7 b8 b9 ba bb bc bd be bf = WO w0 w1 w2 w3
        where w0 = word b0 b1 b2 b3
              w1 = word b4 b5 b6 b7
              w2 = word b8 b9 ba bb
              w3 = word bc bd be bf

-- |Build a Word32 from four Word8 values, presented in big-endian order
word :: Word8 -> Word8 -> Word8 -> Word8 -> Word32
word a b c d =  (fromIntegral a `shiftL` 24)
            .|. (fromIntegral b `shiftL` 16)
            .|. (fromIntegral c `shiftL`  8)
            .|. (fromIntegral d            )

-- |Extract a Word8 from a Word32. Bytes, high to low, are numbered from 3 to 0,
byte :: Int -> Word32 -> Word8
byte i w = fromIntegral (w `shiftR` (i * 8))



instance TestSubject InWords64 where
    toList (WO64 w0 w1) =
        [byte64 7 w0, byte64 6 w0, byte64 5 w0, byte64 4 w0,
         byte64 3 w0, byte64 2 w0, byte64 1 w0, byte64 0 w0,
         byte64 7 w1, byte64 6 w1, byte64 5 w1, byte64 4 w1,
         byte64 3 w1, byte64 2 w1, byte64 1 w1, byte64 0 w1]

    fromList [b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf] =
        Just $ build b0 b1 b2 b3 b4 b5 b6 b7 b8 b9 ba bb bc bd be bf
    fromList _ = Nothing

    mapBytes f (WO64 w0 w1) =
        [f $ byte64 7 w0, f $ byte64 6 w0, f $ byte64 5 w0, f $ byte64 4 w0,
         f $ byte64 3 w0, f $ byte64 2 w0, f $ byte64 1 w0, f $ byte64 0 w0,
         f $ byte64 7 w1, f $ byte64 6 w1, f $ byte64 5 w1, f $ byte64 4 w1,
         f $ byte64 3 w1, f $ byte64 2 w1, f $ byte64 1 w1, f $ byte64 0 w1]

    foldBytes f z (WO64 w0 w1) =
        f' (byte64 7 w0) $ f' (byte64 6 w0) $ f' (byte64 5 w0) $ f' (byte64 4 w0) $
        f' (byte64 3 w0) $ f' (byte64 2 w0) $ f' (byte64 1 w0) $ f' (byte64 0 w0) $
        f' (byte64 7 w1) $ f' (byte64 6 w1) $ f' (byte64 5 w1) $ f' (byte64 4 w1) $
        f' (byte64 3 w1) $ f' (byte64 2 w1) $ f' (byte64 1 w1) $ f' (byte64 0 w1) $ z
        where f' = flip f

    unfoldBytes f z = do
        (b0, a0) <- f z
        (b1, a1) <- f a0
        (b2, a2) <- f a1
        (b3, a3) <- f a2
        (b4, a4) <- f a3
        (b5, a5) <- f a4
        (b6, a6) <- f a5
        (b7, a7) <- f a6
        (b8, a8) <- f a7
        (b9, a9) <- f a8
        (ba, aa) <- f a9
        (bb, ab) <- f aa
        (bc, ac) <- f ab
        (bd, ad) <- f ac
        (be, ae) <- f ad
        (bf,_af) <- f ae
        return (build b0 b1 b2 b3 b4 b5 b6 b7 b8 b9 ba bb bc bd be bf)

    build b0 b1 b2 b3 b4 b5 b6 b7 b8 b9 ba bb bc bd be bf = WO64 w0 w1
        where w0 = word64 b0 b1 b2 b3 b4 b5 b6 b7
              w1 = word64 b8 b9 ba bb bc bd be bf

-- |Build a Word64 from eight Word8 values, presented in big-endian order
word64 :: Word8 -> Word8 -> Word8 -> Word8 -> Word8 -> Word8 -> Word8 -> Word8 -> Word64
word64 a b c d e f g h = (fromIntegral a `shiftL` 56)
                   .|. (fromIntegral b `shiftL` 48)
                   .|. (fromIntegral c `shiftL` 40)
                   .|. (fromIntegral d `shiftL` 32)
                   .|. (fromIntegral e `shiftL` 24)
                   .|. (fromIntegral f `shiftL` 16)
                   .|. (fromIntegral g `shiftL`  8)
                   .|. (fromIntegral h            )

-- |Extract a Word8 from a Word64. Bytes, high to low, are numbered from 7 to 0,
byte64 :: Int -> Word64 -> Word8
byte64 i w = fromIntegral (w `shiftR` (i * 8))


instance TestSubject InList where
    toList (LI bs)          = bs
    fromList                = Just . LI
    mapBytes f (LI bs)      = map f bs
    foldBytes f z (LI bs)   = foldl' f z bs
    unfoldBytes f z         = Just $ LI $ take 16 $ unfoldr f z

instance TestSubject InByteString where
    toList (BS bs)          = B.unpack bs
    fromList                = Just . BS . B.pack
    mapBytes f (BS bs)      = B.foldr (\b r -> f b : r) [] bs
    foldBytes f z (BS bs)   = B.foldl' f z bs
    unfoldBytes f z         = Just $ BS $ fst $ B.unfoldrN 16 f z

instance TestSubject InArray where
    toList (AR ar)          = A.elems ar
    fromList                = Just . AR . A.listArray (0,15)
    mapBytes f (AR ar)      = map f $ A.elems ar
    foldBytes f z (AR ar)   = foldl' f z $ A.elems ar
    unfoldBytes f z         = Just $ AR $ A.listArray (0,15) $ unfoldr f z

instance TestSubject InUArray where
    toList (UA ua)          = U.elems ua
    fromList                = Just . UA . U.listArray (0,15)
    mapBytes f (UA ua)      = map f $ U.elems ua
    foldBytes f z (UA ua)   = foldl' f z $ U.elems ua
    unfoldBytes f z         = Just $ UA $ U.listArray (0,15) $ unfoldr f z

instance TestSubject InVector where
    toList (VE ve)          = V.fromU ve
    fromList                = Just . VE . V.toU
    mapBytes f (VE ve)      = V.foldrU (\b r -> f b : r) [] ve
    foldBytes f z (VE ve)   = V.foldlU f z ve
    unfoldBytes f z         = Just $ VE $ V.unfoldU 16 (sm .f) z
        where sm (Just (a, b)) = V.JustS (a V.:*: b)
              sm Nothing       = V.NothingS





data TestContext d = Ctx { v1a, v1b, v2 :: d, v2b0 :: Word8 }

buildContext :: (TestSubject d) => TestContext d
buildContext = Ctx {
    v1a = build 0x10 0x11 0x12 0x13 0x14 0x15 0x16 0x17
                0x18 0x19 0x1a 0x1b 0x1c 0x1d 0x1e 0x1f,
    v1b = fromJust $ fromList [16..31],
    v2  = fromJust $ fromList [200..215],
    v2b0 = 200
    }

byteContext       :: TestContext InBytes;       byteContext       = buildContext
wordContext       :: TestContext InWords;       wordContext       = buildContext
word64Context     :: TestContext InWords64;     word64Context     = buildContext
listContext       :: TestContext InList;        listContext       = buildContext
byteStringContext :: TestContext InByteString;  byteStringContext = buildContext
arrayContext      :: TestContext InArray;       arrayContext      = buildContext
uArrayContext     :: TestContext InUArray;      uArrayContext     = buildContext
vectorContext     :: TestContext InVector;      vectorContext     = buildContext


type TestComputation a =
    forall d. (TestSubject d, Eq d, Ord d) => TestContext d -> a

bEach :: String -> TestComputation a -> Benchmark
bEach name f = bgroup name [
                    bench "bytes"       $ B f byteContext,
                    bench "words"       $ B f wordContext,
                    bench "words64"     $ B f word64Context,
                    bench "list"        $ B f listContext,
                    bench "byteString"  $ B f byteStringContext,
                    bench "array"       $ B f arrayContext,
                    bench "uArray"      $ B f uArrayContext,
                    bench "vector"      $ B f vectorContext
                    ]

main :: IO ()
main = hSetBuffering stdout NoBuffering >> defaultMain [
        bEach "equal-itself"    (\ctx -> v1a ctx == v1a ctx),
        bEach "equal-same"      (\ctx -> v1a ctx == v1b ctx),
        bEach "equal-differ"    (\ctx -> v1a ctx == v2 ctx),
        bEach "compare-same"    (\ctx -> compare (v1a ctx) (v1b ctx)),
        bEach "compare-differ"  (\ctx -> compare (v1a ctx) (v2 ctx)),
        bEach "toList-and-sum"  (sum . toList . v1a),
        bEach "map-and-sum"     (sum . mapBytes toInt . v2),
        bEach "fold-and-sum"    (foldBytes (flip $ (+) . toInt) 0 . v2),

        bEach "fromList-and-eq"
            (\ctx -> let n = v2b0 ctx in
                (fromJust . fromList) [n..n+15] == v2 ctx),
        bEach "build-and-eq"
            (\ctx -> let n = v2b0 ctx in
                build n (n+1) (n+2) (n+3) (n+4) (n+5) (n+6) (n+7)
                        (n+8) (n+9) (n+10) (n+11) (n+12) (n+13) (n+14) (n+15)
                == v2 ctx),
        bEach "unfold-and-eq"
            (\ctx -> fromJust (unfoldBytes counter (v2b0 ctx)) == v2 ctx)
        ]
    where toInt :: Word8 -> Int
          toInt = fromIntegral
          counter a = Just (a, a+1)



equal-itself/bytes                 154.48 ns
equal-itself/words                  73.11 ns
equal-itself/words64                89.19 ns
equal-itself/list                  455.64 ns
equal-itself/byteString            129.09 ns
equal-itself/array                 214.65 ns
equal-itself/uArray                134.84 ns
equal-itself/vector                575.81 ns

equal-same/bytes                   153.40 ns
equal-same/words                    74.95 ns
equal-same/words64                  84.43 ns
equal-same/list                    464.60 ns
equal-same/byteString              157.38 ns
equal-same/array                   190.70 ns
equal-same/uArray                  136.00 ns
equal-same/vector                  584.04 ns

equal-differ/bytes                  78.01 ns
equal-differ/words                  62.94 ns
equal-differ/words64                69.21 ns
equal-differ/list                   82.36 ns
equal-differ/byteString            155.94 ns
equal-differ/array                  76.39 ns
equal-differ/uArray                 83.74 ns
equal-differ/vector                545.27 ns

compare-same/bytes                 139.47 ns
compare-same/words                  76.23 ns
compare-same/words64                90.45 ns
compare-same/list                  470.13 ns
compare-same/byteString             83.46 ns
compare-same/array                 458.69 ns
compare-same/uArray                413.76 ns
compare-same/vector                538.52 ns

compare-differ/bytes                79.67 ns
compare-differ/words                70.50 ns
compare-differ/words64              84.21 ns
compare-differ/list                 83.46 ns
compare-differ/byteString           83.83 ns
compare-differ/array                93.82 ns
compare-differ/uArray               94.27 ns
compare-differ/vector              499.22 ns

toList-and-sum/bytes               556.38 ns
toList-and-sum/words               895.83 ns
toList-and-sum/words64               1.15 us
toList-and-sum/list                539.67 ns
toList-and-sum/byteString          706.27 ns
toList-and-sum/array               922.25 ns
toList-and-sum/uArray              807.74 ns
toList-and-sum/vector              871.35 ns

map-and-sum/bytes                  418.83 ns
map-and-sum/words                  865.35 ns
map-and-sum/words64                  1.21 us
map-and-sum/list                   547.88 ns
map-and-sum/byteString             554.30 ns
map-and-sum/array                  672.47 ns
map-and-sum/uArray                 590.94 ns
map-and-sum/vector                 596.50 ns

fold-and-sum/bytes                 481.02 ns
fold-and-sum/words                 639.72 ns
fold-and-sum/words64               865.86 ns
fold-and-sum/list                  304.06 ns
fold-and-sum/byteString            341.16 ns
fold-and-sum/array                 632.82 ns
fold-and-sum/uArray                522.96 ns
fold-and-sum/vector                339.46 ns

fromList-and-eq/bytes              790.68 ns
fromList-and-eq/words              666.65 ns
fromList-and-eq/words64            994.36 ns
fromList-and-eq/list               778.35 ns
fromList-and-eq/byteString         857.47 ns
fromList-and-eq/array              933.95 ns
fromList-and-eq/uArray               3.90 us
fromList-and-eq/vector               1.40 us

build-and-eq/bytes                 568.29 ns
build-and-eq/words                 510.61 ns
build-and-eq/words64               890.87 ns
build-and-eq/list                    1.03 us
build-and-eq/byteString            824.19 ns
build-and-eq/array                 960.80 ns
build-and-eq/uArray                  3.79 us
build-and-eq/vector                  1.28 us

unfold-and-eq/bytes                624.53 ns
unfold-and-eq/words                565.21 ns
unfold-and-eq/words64              875.83 ns
unfold-and-eq/list                   1.21 us
unfold-and-eq/byteString             1.35 us
unfold-and-eq/array                  1.09 us
unfold-and-eq/uArray                 3.88 us
unfold-and-eq/vector                 1.19 us
equal-itself/bytes                 149.75 ns
equal-itself/words                  76.60 ns
equal-itself/words64                63.62 ns
equal-itself/list                  516.47 ns
equal-itself/byteString            141.86 ns
equal-itself/array                 192.89 ns
equal-itself/uArray                145.88 ns
equal-itself/vector                467.59 ns

equal-same/bytes                   152.78 ns
equal-same/words                    88.18 ns
equal-same/words64                  68.02 ns
equal-same/list                    523.40 ns
equal-same/byteString              174.43 ns
equal-same/array                   192.97 ns
equal-same/uArray                  123.93 ns
equal-same/vector                  477.57 ns

equal-differ/bytes                  85.94 ns
equal-differ/words                  65.51 ns
equal-differ/words64                63.60 ns
equal-differ/list                   89.86 ns
equal-differ/byteString            174.97 ns
equal-differ/array                  81.49 ns
equal-differ/uArray                 81.89 ns
equal-differ/vector                467.45 ns

compare-same/bytes                 155.65 ns
compare-same/words                  83.35 ns
compare-same/words64                70.42 ns
compare-same/list                  475.55 ns
compare-same/byteString             95.35 ns
compare-same/array                 736.59 ns
compare-same/uArray                431.40 ns
compare-same/vector                513.61 ns

compare-differ/bytes                90.90 ns
compare-differ/words                79.41 ns
compare-differ/words64              93.59 ns
compare-differ/list                136.76 ns
compare-differ/byteString           94.89 ns
compare-differ/array               105.18 ns
compare-differ/uArray              103.92 ns
compare-differ/vector              610.92 ns

toList-and-sum/bytes               649.31 ns
toList-and-sum/words               959.82 ns
toList-and-sum/words64               1.02 us
toList-and-sum/list                605.01 ns
toList-and-sum/byteString          759.76 ns
toList-and-sum/array                 1.11 us
toList-and-sum/uArray              942.17 ns
toList-and-sum/vector              971.62 ns

map-and-sum/bytes                  512.01 ns
map-and-sum/words                    1.23 us
map-and-sum/words64                  1.25 us
map-and-sum/list                   693.42 ns
map-and-sum/byteString             586.06 ns
map-and-sum/array                  838.49 ns
map-and-sum/uArray                 707.18 ns
map-and-sum/vector                 777.43 ns

fold-and-sum/bytes                 647.92 ns
fold-and-sum/words                 890.31 ns
fold-and-sum/words64               910.96 ns
fold-and-sum/list                  301.64 ns
fold-and-sum/byteString            399.00 ns
fold-and-sum/array                 696.64 ns
fold-and-sum/uArray                564.33 ns
fold-and-sum/vector                375.66 ns

fromList-and-eq/bytes                1.01 us
fromList-and-eq/words              819.08 ns
fromList-and-eq/words64            822.43 ns
fromList-and-eq/list                 1.00 us
fromList-and-eq/byteString           1.01 us
fromList-and-eq/array              922.43 ns
fromList-and-eq/uArray               4.49 us
fromList-and-eq/vector               1.23 us

build-and-eq/bytes                 609.08 ns
build-and-eq/words                 532.92 ns
build-and-eq/words64               498.30 ns
build-and-eq/list                    1.16 us
build-and-eq/byteString            896.24 ns
build-and-eq/array                 866.45 ns
build-and-eq/uArray                 17.39 us
build-and-eq/vector                  1.19 us

unfold-and-eq/bytes                807.60 ns
unfold-and-eq/words                603.23 ns
unfold-and-eq/words64              645.94 ns
unfold-and-eq/list                   1.38 us
unfold-and-eq/byteString             1.46 us
unfold-and-eq/array                  1.08 us
unfold-and-eq/uArray                 4.44 us
unfold-and-eq/vector                 1.11 us
_______________________________________________
Haskell-Cafe mailing list
Haskell-Cafe@haskell.org
http://www.haskell.org/mailman/listinfo/haskell-cafe

Reply via email to