While experimenting with the sorting algorithms that have been posted here
recently I discovered that the benchmarks were being quite seriously distorted
by the use of type classes to implement some of them. Even the use of `Ord a'
contexts instead of passing the compare method explicitly was introducing some
minor distortions. Here are the results with type classes eliminated, except
for `pairingSort0', the original `pairingSort' as implemented with the
`PriorityQueue' type class, and "nmsort'", a variant of `nmsort' using `Ord a'
contexts.
< > >= <2 ><2 <100 >100 rand
jonSort | 0.78 | 0.78 | 0.86 | 1.11 | 1.11 | 1.75 | 1.70 | 4.63 |
pairingSort0 | 0.81 | 1.08 | 1.18 | 1.11 | 1.09 | 2.02 | 2.22 | 4.36 |
pairingSort | 0.52 | 0.74 | 0.84 | 0.76 | 0.76 | 1.46 | 1.60 | 2.98 |
splaySort | 0.62 | 0.63 | 0.93 | 0.98 | 1.01 | 2.01 | 1.96 | 5.00 |
nmsort | 0.26 | 1.15 | 1.11 | 1.48 | 1.35 | 1.07 | 1.79 | 2.28 |
nmsort' | 0.25 | 1.32 | 1.23 | 1.72 | 1.58 | 1.30 | 2.10 | 2.83 |
(I have pruned some of the data sets that were duplicating results.) The
Haskell programs and shell script used to collect the data is attached to the
mail message. The programs were compiled with ghc-2.07 and run on a Sun Ultra
Sparc running Solaris 2.5. The minimum time over 10 runs was taken.
(Incidentally, I would have rather that all the bench marking were done under
Haskell rather than the shell script, that way one could factor out the start
up cost and possibly the overhead of generating the test data; a simple potable
interface to the system clock would have been useful here.)
Anyway, once these factors have been taken into account, the natural merge sort
is still looking fairly good but the pairing heaps sort is much closer.
Chris Dornan [EMAIL PROTECTED]
University College Cork +353 21 903165
-- The Haskell Sort Benchmark Program ----------------------------------------
import List(group)
import System(getArgs)
main = do args <- getArgs
let sort = sorter!!read (args!!0)
let input = generators!!read (args!!1)
let n = read (args!!2)
case sum (sort(input n)) of
0 -> return ()
_ -> return ()
test :: Int -> Bool
test n = and [ nmsort x == sort x
| sort <- sorter,
input <- generators,
let x = input n ]
sorter :: (Ord a) => [[a] -> [a]]
sorter = [ pairingSort -- 0
, pairingSort0 -- 1
, jonSort -- 2
, splaySort -- 3
, nmsort -- 4
, nmsort' -- 5
]
generators :: [Int -> [Int]]
generators = [ strictlyIncreasing, -- 0 <
strictlyDecreasing, -- 1 >
decreasing, -- 2 >=
repIncreasing2, -- 3 <2
oscillating2, -- 4 ><2
repIncreasing100, -- 5 <100
repDecreasing100, -- 6 >100
random ] -- 7 rand
strictlyIncreasing n = [1..n]
strictlyDecreasing n = [n,n-1..1]
decreasing n = take n (interleave [n,n-1..1] [n,n-1..1])
repIncreasing2 n = take n (cycle [0,1])
repIncreasing100 n = take n (cycle [0..100])
repDecreasing100 n = take n (cycle [100,99..1])
oscillating2 n = take n (cycle [0,1,1,0])
random n = take n (random2Ints (2*n) (3*n))
interleave :: [a] -> [a] -> [a]
interleave [] y = y
interleave (a:x) y = a : interleave y x
-- The random-number generator which is stolen from the `hbc'-library.
random2Ints :: Int -> Int -> [Int]
random2Ints s1 s2
| s1 < 1 || s1 > 2147483562 = error "random2Ints: Bad first seed."
| s2 < 1 || s2 > 2147483398 = error "random2Ints: Bad second seed."
| otherwise = rands s1 s2
rands :: Int -> Int -> [Int]
rands s1 s2
| z < 1 = z + 2147483562 : rands s1'' s2''
| otherwise = z : rands s1'' s2''
where k = s1 `div` 53668
s1' = 40014 * (s1 - k * 53668) - k * 12211
s1'' | s1' < 0 = s1' + 2147483563
| otherwise = s1'
k' = s2 `div` 52774
s2' = 40692 * (s2 - k' * 52774) - k' * 3791
s2'' | s2' < 0 = s2' + 2147483399
| otherwise = s2'
z = s1'' - s2''
-- pairingSort from Ralf Hinze <[EMAIL PROTECTED]>:
data SeqView t a = Null
| Cons a (t a)
data Tree a = Nil
| Node a [Tree a]
pairingSort :: Ord a => [a] -> [a]
pairingSort = pairingSortBy compare
pairingSortBy :: (a->a->Ordering) -> [a] -> [a]
pairingSortBy cmp = toOrderedListPS cmp . fromListPS cmp
fromListPS :: (a->a->Ordering) -> [a] -> Tree a
fromListPS cmp = meldAllPS cmp . map singlePS
singlePS a = Node a []
meldPS cmp Nil u = u
meldPS cmp t@(Node _ _) Nil = t
meldPS cmp t@(Node a ts) u@(Node b us) =
case cmp a b of
GT -> Node b (t:us)
_ -> Node a (u:ts)
meldAllPS cmp [] = Nil
meldAllPS cmp [t] = t
meldAllPS cmp (t:u:ts) = meldPS cmp (meldPS cmp t u) (meldAllPS cmp ts)
toOrderedListPS cmp q =
case splitMinPS cmp q of
Null -> []
Cons a q -> a : toOrderedListPS cmp q
splitMinPS cmp Nil = Null
splitMinPS cmp (Node a ts) = Cons a (meldAllPS cmp ts)
-- pairingSort from Ralf Hinze <[EMAIL PROTECTED]>
-- (with type classes):
class PriorityQueue q where
empty :: (Ord a) => q a
single :: (Ord a) => a -> q a
insert' :: (Ord a) => a -> q a -> q a
meld :: (Ord a) => q a -> q a -> q a
splitMin :: (Ord a) => q a -> SeqView q a
fromList :: (Ord a) => [a] -> q a
toOrderedList :: (Ord a) => q a -> [a]
single a = insert' a empty
insert' a q = single a `meld` q
fromList = foldm meld empty . map single
toOrderedList q = case splitMin q of
Null -> []
Cons a q -> a : toOrderedList q
pairingSort0 :: (Ord a) => [a] -> [a]
pairingSort0 = toOrderedList
. (fromList :: (Ord a) => [a] -> Tree a)
foldm :: (a -> a -> a) -> a -> [a] -> a
foldm (*) e [] = e
foldm (*) e x = fst (rec (length x) x)
where rec 1 (a:x) = (a, x)
rec n x = (a * b, z)
where m = n `div` 2
(a, y) = rec (n - m) x
(b, z) = rec m y
instance PriorityQueue Tree where
empty = Nil
single a = Node a []
meld Nil u = u
meld t@(Node _ _) Nil = t
meld t@(Node a ts) u@(Node b us)
| a <= b = Node a (u:ts)
| otherwise = Node b (t:us)
splitMin Nil = Null
splitMin (Node a ts) = Cons a (meldAll0 ts)
fromList = meldAll0 . map single
meldAll0 :: (Ord a) => [Tree a] -> Tree a
meldAll0 [] = Nil
meldAll0 [t] = t
meldAll0 (t:u:ts) = meld (meld t u) (meldAll0 ts)
-- `jonSort' from Jon Fairbairn <[EMAIL PROTECTED]>
-- (based on the imperative Heap Sort):
data Heap a = Nil0 | Node0 a [Heap a]
heapify x = Node0 x []
jonSort :: Ord a => [a] -> [a]
jonSort = jonSortBy compare
jonSortBy :: (a->a->Ordering) -> [a] -> [a]
jonSortBy cmp = flatten_heap . merge_heaps . map heapify
where
merge_heaps = foldb merge_heap Nil0
flatten_heap Nil0 = []
flatten_heap (Node0 x heaps) = x:flatten_heap (merge_heaps heaps)
merge_heap Nil0 _ = Nil0
merge_heap heap@(Node0 _ _) Nil0 = heap
merge_heap node_a@(Node0 a heaps_a) node_b@(Node0 b heaps_b) =
case cmp a b of
LT -> Node0 a (node_b: heaps_a)
_ -> Node0 b (node_a: heaps_b)
-- `splaySort' by Chris Okasaki <[EMAIL PROTECTED]>:
data Splay a = SEmpty | SNode (Splay a) a (Splay a)
splaySort :: (Ord a) => [a] -> [a]
splaySort = splaySortBy compare
splaySortBy :: (a->a->Ordering) -> [a] -> [a]
splaySortBy cmp = toOrderedListSplay . fromListSplay cmp
toOrderedListSplay t = tol t []
where tol SEmpty rest = rest
tol (SNode a x b) rest = tol a (x : tol b rest)
fromListSplay cmp xs = foldr (insertSplay cmp) SEmpty xs
insertSplay cmp k t = SNode a k b
where
(a, b) = partition t -- elements of a <= k, elements of b > k
partition SEmpty = (SEmpty,SEmpty)
partition t@(SNode tl x tr) =
case cmp x k of
LT ->
case tr of
SEmpty -> (t,SEmpty)
SNode trl y trr -> case cmp y k of
LT ->
let tl' = SNode tl x trl
(lt,ge) = partition trr
in (SNode tl' y lt,ge)
_ ->
let (lt,ge) = partition trl
in (SNode tl x lt,SNode ge y trr)
_ ->
case tl of
SEmpty -> (SEmpty,t)
SNode tll y tlr -> case cmp y k of
LT ->
let (lt,ge) = partition tlr
in (SNode tll y lt,SNode ge x tr)
_ ->
let tr' = SNode tlr x tr
(lt,ge) = partition tll
in (lt,SNode ge y tr')
-- A natural merge sort by Bob Buckley <[EMAIL PROTECTED]>
-- with contributions from Chris Dornan <[EMAIL PROTECTED]>,
-- Lennart Augustsson <[EMAIL PROTECTED]> and
-- Jon Fairbairn <[EMAIL PROTECTED]>:
nmsort :: Ord a => [a] -> [a]
nmsort = nmsortBy compare
nmsortBy :: (a->a->Ordering) -> [a] -> [a]
nmsortBy cmp xs = foldb (merge cmp) [] (runs cmp xs)
runs :: (a->a->Ordering) -> [a] -> [[a]]
runs cmp [] = []
runs cmp (x:xs) = runs' x [] xs
where
runs' x xs [] = [reverse (x:xs)]
runs' x xs (y:ys) =
case cmp x y of
GT -> reverse (x:xs) : runs' y [] ys
_ -> runs' y (x:xs) ys
merge:: (a->a->Ordering) -> [a] -> [a] -> [a]
merge cmp [] l = l
merge cmp l@(_:_) [] = l
merge cmp l1@(h1:t1) l2@(h2:t2) =
case cmp h1 h2 of
GT -> h2:merge cmp l1 t2
_ -> h1:merge cmp t1 l2
foldb :: (a->a->a) -> a -> [a] -> a
foldb f zero [] = zero
foldb f zero [x] = x
foldb f zero xs = foldb f zero (fold xs)
where
fold (x1:x2:xs) = f x1 x2 : fold xs
fold xs = xs
-- The above `nmsort', but using `Ord a => ' contexts instead of explicitly
-- passing the `compare' functions.
nmsort' :: Ord a => [a] -> [a]
nmsort' xs = foldb merge' [] (runs' xs)
runs' :: Ord a => [a] -> [[a]]
runs' [] = []
runs' (x:xs) = runs' x [] xs
where
runs' x xs [] = [reverse (x:xs)]
runs' x xs (y:ys) =
case compare x y of
GT -> reverse (x:xs) : runs' y [] ys
_ -> runs' y (x:xs) ys
merge':: Ord a => [a] -> [a] -> [a]
merge' [] l = l
merge' l@(_:_) [] = l
merge' l1@(h1:t1) l2@(h2:t2) =
case compare h1 h2 of
GT -> h2:merge' l1 t2
_ -> h1:merge' t1 l2
-- The Benchmarking Shell Script ----------------------------------------------
#!/bin/sh
# 3: pairSort; 8: msort'
if [ $# != 1 ]; then
echo usage: $0 n
exit 1
fi
tmp=/tmp/out$$
sort ()
{
time -p sorted $1 $2 50000 +RTS -K16M -H64M >/dev/null 2>$tmp || \
{ cat $tmp; rm $tmp; echo; echo "$0: terminated"; exit 1; }
result=`sed -n -e 's/real *\([0-9.]*\)/\1/p' $tmp`
}
echo "$1: |\c"
for i in 0 1 2 3 4 5 6 7; do
min=1000.0
for x in 1 2 3 4 5 6 7 8 9 10; do
sort $1 $i
lt=`echo "if ($result<$min) 1; if ($result>=$min) 0"|bc`
if [ $lt = 1 ]; then
min=$result
fi
done
echo " $result |\c"
done
echo
exit 0