(Extracting these questions from my previous thread for clarity.)

Below is my simplest possible program to solve the Fasta shootout
benchmark.

http://shootout.alioth.debian.org/gp4/benchmark.php?test=fasta&lang=all
http://haskell.org/haskellwiki/Shootout/Fasta

I can see one remaining flaw - the line marked 'Ugly'. What's the best
way to get rid of this line?

Any other suggestions for simplifying or improving the program would
also be interesting.

This code is about three or four times slower that the current fastest
GHC entry for the Fasta benchmark. I'll elaborate it for speed when
I've produced the best version regardless of speed.

Richard.


{-# OPTIONS -O -fexcess-precision #-}
-- The Computer Language Shootout : Fasta
-- http://shootout.alioth.debian.org/
-- Simple solution by Richard Kelsall.
-- http://www.millstream.com/

import System

main = do
    n <- getArgs >>= readIO . head

    title "ONE" "Homo sapiens alu"
    writeLined (cycle alu) (n * 2)

    title "TWO" "IUB ambiguity codes"
    let (r1, r2) = splitAt (fromIntegral (n * 3)) (rand 42)  -- Ugly !!
    writeLined (map (look iubs) r1) (n * 3)

    title "THREE" "Homo sapiens frequency"
    writeLined (map (look homs) r2) (n * 5)

title :: String -> String -> IO ()
title a b = putStrLn $ ">" ++ a ++ " " ++ b

look :: [(Char, Float)] -> Float -> Char
look [(c, _)] _ = c
look ((c, f) : cfs) r = if r < f
                           then c
                           else look cfs (r - f)

lineWidth = 60

writeLined :: [Char] -> Integer -> IO ()
writeLined cs 0 = return ()
writeLined cs n = do
                let w = min n lineWidth
                    (cs1, cs2) = splitAt (fromInteger w) cs
                putStrLn cs1
                writeLined cs2 (n - w)

rand :: Int -> [Float]
rand seed = newran : (rand newseed)
    where
        im = 139968
        ia = 3877
        ic = 29573
        newseed = (seed * ia + ic) `rem` im
        newran = fromIntegral newseed / fromIntegral im

alu = "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGA\
      \TCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACT\
      \AAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAG\
      \GCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCG\
      \CCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA"

iubs = [('a', 0.27), ('c', 0.12), ('g', 0.12), ('t', 0.27), ('B', 0.02),
        ('D', 0.02), ('H', 0.02), ('K', 0.02), ('M', 0.02), ('N', 0.02),
        ('R', 0.02), ('S', 0.02), ('V', 0.02), ('W', 0.02), ('Y', 0.02)]

homs = [('a', 0.3029549426680), ('c', 0.1979883004921),
        ('g', 0.1975473066391), ('t', 0.3015094502008)]

_______________________________________________
Haskell-Cafe mailing list
Haskell-Cafe@haskell.org
http://www.haskell.org/mailman/listinfo/haskell-cafe

Reply via email to