I've written little framework to work on. See sortbench.hs and
sortbench.pyattachments.
Furthermore, I checked Yhc's implementation of sort: it is indeed very fast:

[EMAIL PROTECTED] sorting]$ python sortbench.py
Benchmark type: OnSorted
[1 of 1] Compiling Main             ( sortbench.hs, sortbench.o )
Linking sortbenchOnSorted.bin ...
1/10
(...)
10/10
Total time: 171.392577887
Scaled vs best.:
('yhcSort', 1.0)
('sort', 4.1826933506099904)
('treeSort', 4.2466878529708207)
Benchmark type: OnRevsorted
[1 of 1] Compiling Main             ( sortbench.hs, sortbench.o )
Linking sortbenchOnRevsorted.bin ...
1/10
(...)
10/10
Total time: 187.789487839
Scaled vs best.:
('yhcSort', 1.0)
('treeSort', 1.2973727012306746)
('sort', 1.3028663057478311)
Benchmark type: OnRandom
[1 of 1] Compiling Main             ( sortbench.hs, sortbench.o )
Linking sortbenchOnRandom.bin ...
1/10
(...)
10/10
Total time: 289.231264114
Scaled vs best.:
('yhcSort', 1.0)
('treeSort', 1.1167200854190948)
('sort', 1.2050043053111394)


The above results are for 1000000 Ints x 10 runs, but I don't expect any
drastic changes in longer run. I leave the interpretation up to you.
I must also admit there are not quickCheck properties in the code. Maybe
someone will want to write some.



Christopher Skrzętnicki


On Mon, Mar 10, 2008 at 9:36 AM, Neil Mitchell <[EMAIL PROTECTED]> wrote:

> Hi
>
> Can whoever picks this up please try the sort code from Yhc in their
> comparisons. In my benchmarks it ran up to twice as fast as the GHC
> code. http://darcs.haskell.org/yhc/src/packages/yhc-base-1.0/Data/List.hs
>
> I think what we really need is first quickCheck and timing framework
> for measuring sorts. After we have decided what makes one sort
> faster/better than another, then is the time to start deciding what
> sort is the best one. Ian did some initial work on this:
>
> http://www.haskell.org/pipermail/glasgow-haskell-users/2002-May/003376.html
>
> Until the sort-check package is uploaded to hackage I think most
> people will find it hard to be convinced of one sort over another.
>
>
>

Attachment: sorting.tar.gz
Description: GNU Zip compressed data

#! /usr/bin/python

import os, time

CLEAN_CMD = 'rm -f *.o *.hi *.bin'
BUILD_CMD = 'ghc -main-is main%s --make sortbench.hs -o %s'
BINARY_NAME = 'sortbench%s.bin'
TYPES = ['OnSorted','OnRevsorted','OnRandom']

def clean():
    os.system(CLEAN_CMD)

def build( benchType ):
    binName = BINARY_NAME % (benchType,)
    os.system(BUILD_CMD % (benchType, binName))

def test( benchType ):
    print 'Benchmark type:', benchType
    
    clean()
    build(benchType)
    binName = BINARY_NAME % (benchType,)
    DATA_LENGTH = 1000000 # beware, 3000000 gives ~700Mb memory consumption for Int
    TEST_CASES = 10
    dateNow = "_".join(map(str,time.localtime()[:-3]))[2:] # ugly, but works
    output_log = 'sortbench%s_%s.txt' % (benchType,dateNow)

    start = time.time()
    for i in range(TEST_CASES):
        print "%d/%d" % (i+1,TEST_CASES)
        os.system('./%s %d 1>/dev/null 2>>%s' % (binName, DATA_LENGTH, output_log) )
    stop = time.time()

    print 'Total time:',stop-start

    dc = {}
    for line in open(output_log):
        (t,al) = eval(line)
        dc.setdefault( al, 0 );
        dc[al] += t

    print "Scaled vs best.:"

    g = list(dc.iteritems())
    g.sort(key = lambda (x,y) : y )
    for el in g:
        print tuple([el[0],float(el[1]) / g[0][1]])

def main():
    for t in TYPES:
        test(t)

if __name__ == '__main__':
    main()
{-# OPTIONS_GHC -O2 -fbang-patterns #-}
module Main where

import System.CPUTime
import System.IO
import System.Environment
import System.Random
import Data.List( partition, sort, unfoldr )

import Control.Parallel.Strategies
import Control.Arrow

import qualified Data.Map as Map




-- functions to benchmark

treeSort = concatMap (reverse . snd) . Map.toAscList
           . Map.fromListWith (++) . map (\x -> (x,[x]))


yhcSort :: (Ord a) => [a] -> [a]
yhcSort = sortByYhc compare

sortByYhc cmp = mergeAll . sequences
  where
    sequences (a:b:xs)
      | a `cmp` b == GT = descending b [a]  xs
      | otherwise       = ascending  b [a] xs
    sequences xs = [xs]

    descending a as (b:bs)
      | a `cmp` b == GT = descending b (a:as) bs
    descending a as bs  = (a:as): sequences bs

    ascending a as (b:bs)
      | a `cmp` b /= GT = ascending b (a:as) bs
    ascending a as bs   = rev as [a] : sequences bs

    rev (x:xs) ys = rev xs (x:ys)
    rev [] ys = ys

    mergeAll [x] = x
    mergeAll xs  = mergeAll (mergePairs xs)

    mergePairs (a:b:xs) = merge a b: mergePairs xs
    mergePairs xs       = xs

    merge as@(a:as') bs@(b:bs')
      | a `cmp` b == GT = b:merge as  bs'
      | otherwise       = a:merge as' bs
    merge [] bs         = bs
    merge as []         = as



-- begin benchmark making code

makeBenchs benchs xs = do
 let (funcNames, funcs) = unzip benchs
 tBegin <- getCPUTime
 timers <- mapM (\f-> print (f xs) >> getCPUTime) funcs
 let times = zipWith (-) timers (tBegin:timers)
     sortedResults = sort $ zip times funcNames
     minT = fromIntegral $ fst $ head sortedResults
     scaled = map (((/minT) . fromIntegral) *** id) sortedResults
 hPutStr stderr $ unlines $ map show scaled --sortedResults

onRandom eltCnt = do
 gen <- getStdGen
 let xs = take eltCnt (randomRs (1::Int, bigNum) gen) `using` rnf
 xs `seq` return xs

onSorted eltCnt = do
 gen <- getStdGen
 let xs = take eltCnt (randomRs (1::Int, bigNum) gen) `using` rnf
     sxs = sort xs `using` rnf
 xs `seq` sxs `seq` return sxs

onRevsorted eltCnt = do
 gen <- getStdGen
 let xs = take eltCnt (randomRs (1::Int, bigNum) gen) `using` rnf
     sxs = reverse (sort xs `using` rnf)
 xs `seq` sxs `seq` return sxs


bigNum = 1000000 :: Int

-- end of benchmark making code


sortFunctions = [("sort",sort),("treeSort",treeSort),("yhcSort", yhcSort)]

mainOnSorted = makeBenchs sortFunctions
             =<< onSorted . read . head =<< getArgs


mainOnRevsorted = makeBenchs sortFunctions
                =<< onRevsorted . read . head =<< getArgs
                  

mainOnRandom = makeBenchs sortFunctions
             =<< onRandom . read . head =<< getArgs



main = mainOnSorted -- however, you can always use -main-is flag.

--
_______________________________________________
Haskell-Cafe mailing list
Haskell-Cafe@haskell.org
http://www.haskell.org/mailman/listinfo/haskell-cafe

Reply via email to