Sorting and Type Classes

Chris Dornan Fri, 17 Oct 1997 13:45:16 +0100 (BST)

While experimenting with the sorting algorithms that have been posted here
recently I discovered that the benchmarks were being quite seriously distorted
by the use of type classes to implement some of them.  Even the use of `Ord a'
contexts instead of passing the compare method explicitly was introducing some
minor distortions.  Here are the results with type classes eliminated, except
for `pairingSort0', the original `pairingSort' as implemented with the
`PriorityQueue' type class, and "nmsort'", a variant of `nmsort' using `Ord a'
contexts.

               <      >      >=     <2     ><2    <100   >100   rand
jonSort      | 0.78 | 0.78 | 0.86 | 1.11 | 1.11 | 1.75 | 1.70 | 4.63 |
pairingSort0 | 0.81 | 1.08 | 1.18 | 1.11 | 1.09 | 2.02 | 2.22 | 4.36 |
pairingSort  | 0.52 | 0.74 | 0.84 | 0.76 | 0.76 | 1.46 | 1.60 | 2.98 |
splaySort    | 0.62 | 0.63 | 0.93 | 0.98 | 1.01 | 2.01 | 1.96 | 5.00 |
nmsort       | 0.26 | 1.15 | 1.11 | 1.48 | 1.35 | 1.07 | 1.79 | 2.28 |
nmsort'      | 0.25 | 1.32 | 1.23 | 1.72 | 1.58 | 1.30 | 2.10 | 2.83 |

(I have pruned some of the data sets that were duplicating results.)  The
Haskell programs and shell script used to collect the data is attached to the
mail message.  The programs were compiled with ghc-2.07 and run on a Sun Ultra
Sparc running Solaris 2.5.  The minimum time over 10 runs was taken.
(Incidentally, I would have rather that all the bench marking were done under
Haskell rather than the shell script, that way one could factor out the start
up cost and possibly the overhead of generating the test data; a simple potable
interface to the system clock would have been useful here.)

Anyway, once these factors have been taken into account, the natural merge sort
is still looking fairly good but the pairing heaps sort is much closer.

Chris Dornan                    [EMAIL PROTECTED]
University College Cork         +353 21 903165


-- The Haskell Sort Benchmark  Program ----------------------------------------


import List(group)
import System(getArgs)



main    =  do args <- getArgs
              let sort  =  sorter!!read (args!!0)
              let input =  generators!!read (args!!1)
              let n     =  read (args!!2)
              case sum (sort(input n)) of
                0 -> return ()
                _ -> return ()




test :: Int -> Bool
test n  =  and [ nmsort x == sort x
               | sort <- sorter,
                 input <- generators,
                 let x = input n ]


sorter  :: (Ord a) => [[a] -> [a]]
sorter  =       [ pairingSort           -- 0
                , pairingSort0          -- 1
                , jonSort               -- 2
                , splaySort             -- 3
                , nmsort                -- 4
                , nmsort'               -- 5
              ]

generators :: [Int -> [Int]]
generators =  [ strictlyIncreasing,     -- 0 <
                strictlyDecreasing,     -- 1 >
                decreasing,             -- 2 >=
                repIncreasing2,         -- 3 <2
                oscillating2,           -- 4 ><2
                repIncreasing100,       -- 5 <100
                repDecreasing100,       -- 6 >100
                random ]                -- 7 rand

strictlyIncreasing n = [1..n]
strictlyDecreasing n = [n,n-1..1]
decreasing         n = take n (interleave [n,n-1..1] [n,n-1..1])
repIncreasing2     n = take n (cycle [0,1])
repIncreasing100   n = take n (cycle [0..100])
repDecreasing100   n = take n (cycle [100,99..1])
oscillating2       n = take n (cycle [0,1,1,0])
random             n = take n (random2Ints (2*n) (3*n))



interleave                      :: [a] -> [a] -> [a]
interleave [] y         =  y
interleave (a:x) y              =  a : interleave y x


-- The random-number generator which is stolen from the `hbc'-library.

random2Ints             :: Int -> Int -> [Int]
random2Ints s1 s2
   | s1 < 1 || s1 > 2147483562  =  error "random2Ints: Bad first seed."
   | s2 < 1 || s2 > 2147483398  =  error "random2Ints: Bad second seed."
   | otherwise                  =  rands s1 s2

rands                           :: Int -> Int -> [Int]
rands s1 s2
     | z < 1                    =  z + 2147483562 : rands s1'' s2''
     | otherwise                =  z              : rands s1'' s2''
     where k                    =  s1 `div` 53668
           s1'                  =  40014 * (s1 - k * 53668) - k * 12211
           s1'' | s1' < 0       =  s1' + 2147483563
                | otherwise     =  s1'
           k'                   =  s2 `div` 52774
           s2'                  =  40692 * (s2 - k' * 52774) - k' * 3791
           s2'' | s2' < 0       =  s2' + 2147483399
                | otherwise    =  s2'
           z                    =  s1'' - s2''




-- pairingSort from Ralf Hinze <[EMAIL PROTECTED]>:


data SeqView t a  =  Null
                  |  Cons a (t a)

data Tree a = Nil
            | Node a [Tree a]


pairingSort :: Ord a => [a] -> [a]
pairingSort = pairingSortBy compare

pairingSortBy ::  (a->a->Ordering) -> [a] -> [a]
pairingSortBy cmp = toOrderedListPS cmp . fromListPS cmp

fromListPS :: (a->a->Ordering) -> [a] -> Tree a
fromListPS cmp =  meldAllPS cmp . map singlePS

singlePS a =  Node a []

meldPS cmp Nil u            =  u
meldPS cmp t@(Node _ _) Nil =  t
meldPS cmp t@(Node a ts) u@(Node b us) =
        case cmp a b of
          GT -> Node b (t:us)
          _  -> Node a (u:ts)

meldAllPS cmp []       =  Nil
meldAllPS cmp [t]      =  t
meldAllPS cmp (t:u:ts) =  meldPS cmp (meldPS cmp t u) (meldAllPS cmp ts)

toOrderedListPS cmp q = 
        case splitMinPS cmp q of
          Null     -> []
          Cons a q -> a : toOrderedListPS cmp q

splitMinPS cmp Nil         =  Null
splitMinPS cmp (Node a ts) =  Cons a (meldAllPS cmp ts)




-- pairingSort from Ralf Hinze <[EMAIL PROTECTED]>
-- (with type classes):


class PriorityQueue q where
     empty                      :: (Ord a) => q a
     single                     :: (Ord a) => a -> q a
     insert'                    :: (Ord a) => a -> q a -> q a
     meld                       :: (Ord a) => q a -> q a -> q a
     splitMin                   :: (Ord a) => q a -> SeqView q a
     fromList                   :: (Ord a) => [a] -> q a
     toOrderedList              :: (Ord a) => q a -> [a]

     single a                   =  insert' a empty
     insert' a q                =  single a `meld` q
     fromList                   =  foldm meld empty . map single
     toOrderedList q            =  case splitMin q of
         Null                   -> []
         Cons a q               -> a : toOrderedList q


pairingSort0                    :: (Ord a) => [a] -> [a]
pairingSort0                    =  toOrderedList
                                .  (fromList :: (Ord a) => [a] -> Tree a)


foldm                           :: (a -> a -> a) -> a -> [a] -> a
foldm (*) e []          =  e
foldm (*) e x                   =  fst (rec (length x) x)
     where rec 1 (a:x)          =  (a, x)
           rec n x              =  (a * b, z)
               where m          =  n `div` 2
                     (a, y)     =  rec (n - m) x 
                     (b, z)     =  rec m       y

instance PriorityQueue Tree where
     empty                      =  Nil
     single a                   =  Node a []
     meld Nil u         =  u
     meld t@(Node _ _) Nil      =  t
     meld t@(Node a ts) u@(Node b us)
         | a <= b               =  Node a (u:ts)
         | otherwise            =  Node b (t:us)
     splitMin Nil               =  Null
     splitMin (Node a ts)       =  Cons a (meldAll0 ts)
     fromList                   =  meldAll0 . map single

meldAll0                        :: (Ord a) => [Tree a] -> Tree a
meldAll0 []                     =  Nil
meldAll0 [t]                    =  t
meldAll0 (t:u:ts)               =  meld (meld t u) (meldAll0 ts)




-- `jonSort' from Jon Fairbairn <[EMAIL PROTECTED]>
-- (based on the imperative Heap Sort):

data Heap a = Nil0 | Node0 a [Heap a]

heapify x = Node0 x []

jonSort :: Ord a => [a] -> [a]
jonSort = jonSortBy compare
 
jonSortBy :: (a->a->Ordering) -> [a] -> [a]
jonSortBy cmp = flatten_heap . merge_heaps . map heapify
       where
         merge_heaps = foldb merge_heap Nil0
  
         flatten_heap Nil0 = []
         flatten_heap (Node0 x heaps) = x:flatten_heap (merge_heaps heaps)
  
         merge_heap Nil0                     _    = Nil0
         merge_heap heap@(Node0 _ _)         Nil0 = heap
         merge_heap node_a@(Node0 a heaps_a) node_b@(Node0 b heaps_b) =
                        case cmp a b of
                          LT -> Node0 a (node_b: heaps_a)
                          _  -> Node0 b (node_a: heaps_b)




-- `splaySort' by Chris Okasaki <[EMAIL PROTECTED]>:

data Splay a = SEmpty | SNode (Splay a) a (Splay a)

splaySort :: (Ord a) => [a] -> [a]
splaySort = splaySortBy compare

splaySortBy :: (a->a->Ordering) -> [a] -> [a]
splaySortBy cmp = toOrderedListSplay . fromListSplay cmp

toOrderedListSplay t = tol t []
     where tol SEmpty rest = rest
           tol (SNode a x b) rest = tol a (x : tol b rest)

fromListSplay cmp xs = foldr (insertSplay cmp) SEmpty xs
  
insertSplay cmp k t = SNode a k b
     where 
       (a, b) = partition t  -- elements of a <= k, elements of b > k
 
       partition SEmpty = (SEmpty,SEmpty)
       partition t@(SNode tl x tr) =
                case cmp x k of
                  LT ->
                     case tr of
                       SEmpty -> (t,SEmpty)
                       SNode trl y trr -> case cmp y k of
                         LT -> 
                             let tl' = SNode tl x trl
                                 (lt,ge) = partition trr
                             in (SNode tl' y lt,ge)
                         _  ->
                             let (lt,ge) = partition trl
                             in (SNode tl x lt,SNode ge y trr)
                  _  ->
                     case tl of
                       SEmpty -> (SEmpty,t)
                       SNode tll y tlr -> case cmp y k of
                         LT ->
                             let (lt,ge) = partition tlr
                             in (SNode tll y lt,SNode ge x tr)
                         _  ->
                             let tr' = SNode tlr x tr
                                 (lt,ge) = partition tll
                             in (lt,SNode ge y tr')




-- A natural merge sort by Bob Buckley <[EMAIL PROTECTED]>
-- with contributions from Chris Dornan <[EMAIL PROTECTED]>,
-- Lennart Augustsson <[EMAIL PROTECTED]> and
-- Jon Fairbairn <[EMAIL PROTECTED]>:

nmsort :: Ord a => [a] -> [a]
nmsort = nmsortBy compare
 
nmsortBy :: (a->a->Ordering) -> [a] -> [a]
nmsortBy cmp xs = foldb (merge cmp) [] (runs cmp xs)

runs :: (a->a->Ordering) -> [a] -> [[a]]
runs cmp [] = []
runs cmp (x:xs) = runs' x [] xs
      where
        runs' x xs [] = [reverse (x:xs)]
        runs' x xs (y:ys) =
                case cmp x y of
                  GT -> reverse (x:xs) : runs' y [] ys
                  _  -> runs' y (x:xs) ys

merge:: (a->a->Ordering) -> [a] -> [a] -> [a]
merge cmp [] l = l
merge cmp l@(_:_) [] = l
merge cmp l1@(h1:t1) l2@(h2:t2) =
       case cmp h1 h2 of
         GT -> h2:merge cmp l1 t2
         _  -> h1:merge cmp t1 l2
 
foldb :: (a->a->a) -> a -> [a] -> a
foldb f zero []  = zero
foldb f zero [x] = x
foldb f zero xs  = foldb f zero (fold xs)
     where
       fold (x1:x2:xs) = f x1 x2 : fold xs
       fold xs         = xs




-- The above `nmsort', but using `Ord a => ' contexts instead of explicitly
-- passing the `compare' functions.

nmsort' :: Ord a => [a] -> [a]
nmsort' xs = foldb merge' [] (runs' xs)

runs' :: Ord a => [a] -> [[a]]
runs' [] = []
runs' (x:xs) = runs' x [] xs
      where
        runs' x xs [] = [reverse (x:xs)]
        runs' x xs (y:ys) =
                case compare x y of
                  GT -> reverse (x:xs) : runs' y [] ys
                  _  -> runs' y (x:xs) ys

merge':: Ord a => [a] -> [a] -> [a]
merge' [] l = l
merge' l@(_:_) [] = l
merge' l1@(h1:t1) l2@(h2:t2) =
       case compare h1 h2 of
         GT -> h2:merge' l1 t2
         _  -> h1:merge' t1 l2



-- The Benchmarking Shell Script ----------------------------------------------

#!/bin/sh

# 3: pairSort; 8: msort'

if [ $# != 1 ]; then
        echo usage: $0 n
        exit 1
fi

tmp=/tmp/out$$

sort ()
{
  time -p sorted $1 $2 50000 +RTS -K16M -H64M >/dev/null 2>$tmp || \
                { cat $tmp; rm $tmp; echo; echo "$0: terminated"; exit 1; }
  result=`sed -n -e 's/real  *\([0-9.]*\)/\1/p' $tmp`
}

echo "$1: |\c"
for i in 0 1 2 3 4 5 6 7; do
  min=1000.0
  for x in 1 2 3 4 5 6 7 8 9 10; do
      sort $1 $i
      lt=`echo "if ($result<$min) 1; if ($result>=$min) 0"|bc`
      if [ $lt = 1 ]; then
         min=$result
      fi
  done
  echo " $result |\c"
done
echo
exit 0
Sorting and Type Classes

Reply via email to