Re: [Haskell-cafe] my take at knucleotide
I have posted this version.Mad home grown HashMap and replaced IOref with Ptr.This made program twice as fast as current entry. {-# Language BangPatterns #-} The Computer Language Benchmarks Game-- http://benchmarksgame.alioth.debian.org/ Contributed by Branimir Maksimovic--import Data.Bitsimport Data.Charimport Data.Intimport Data.Listimport Data.Array.Baseimport Data.Array.Unboxedimport Data.Array.IOimport qualified Data.ByteString.Char8 as Simport Foreign.Ptrimport Foreign.Storableimport Foreign.Marshal.Allocimport Control.Concurrentimport Text.Printf main = dolet skip = dol - S.getLineif S.isPrefixOf (S.pack THREE) lthen return ()else skip skips - S.getContentslet content = S.filter ((/=) '\n') s;mapM_ (execute content) actionsdata Actions = I Int | S Stringactions = [I 1,I 2, S GGT,S GGTA,S GGTATT,S GGTAAATT,S GGTAAATTTATAGT]execute content (I i) = writeFrequencies content iexecute content (S s) = writeCount content s writeFrequencies :: S.ByteString - Int - IO ()writeFrequencies input size = doht - tcalculate input sizelst - Main.foldM (\lst (k,v)-do v' - peek vreturn $ (k,v'):lst) [] htlet sorted = sortBy (\(_,x) (_,y) - y `compare` x) lstsum = fromIntegral ((S.length input) + 1 - size)mapM_ (\(k,v)- doprintf %s %.3f\n (toString k) ((100 * (fromIntegral v)/sum)::Double)) sortedputChar '\n' writeCount :: S.ByteString - String - IO ()writeCount input string = do let size = length stringk = T (toNum (S.pack string) 0 size) sizeht - tcalculate input sizeres - Main.lookup ht kcase res of Nothing - putStrLn $ string ++ not found...Just v - do r - peek vprintf %d\t%s\n r string tcalculate :: S.ByteString - Int - IO HMtcalculate input size = dolet l = [0..63]actions = map (\i - (calculate input i size (length l))) lvars - mapM (\action - dovar - newEmptyMVar forkIO $ doanswer - action putMVar var answerreturn var) actionsresult - newTable :: IO HMresults - mapM takeMVar varsmapM_ (\ht - Main.foldM (\lst (k,v) - do res - Main.lookup lst kcase res ofNothing - dor1 - peek v r2 - mallocpoke r2 r1 Main.insert lst k r2Just v1 - dor1 - peek v1 r2 - peek vpoke v1 (r1+r2) return lst) result ht) resultsreturn result calculate :: S.ByteString - Int - Int - Int - IO HMcalculate input beg size incr = do!ht - newTable :: IO HMletcalculate' i | i = ((S.length input)+1 - size) = return ht | otherwise = do let k = T (toNum input i size) sizeres - Main.lookup ht k case res ofNothing - do!r - malloc poke r 1Main.insert ht k r Just v - do !r - peek vpoke v (r+1) calculate' (i+incr)calculate' beg toNum :: S.ByteString - Int - Int - Int64toNum s beg size = toNum' 0 size wheretoNum' v 0 = vtoNum' v i = toNum' ((v `shiftL` 2) .|. (toNumA `unsafeAt` (ord (S.index s (beg+i-1) (i-1) toString :: T - StringtoString (T v s) = toString' v swhere toString' v 0 = []toString' v i = case v..3 of 0 - 'A'1 - 'C'2 - 'T' 3 - 'G' : toString' (v `shiftR` 2) (i-1) toNumA :: UArray Int Int64toNumA = array (0,255) [(ord 'a',0),(ord 'c',1),(ord 't',2),(ord 'g',3),(ord 'A',0),(ord 'C',1),(ord 'T',2),(ord 'G',3)] data T = T !Int64 !Intinstance Eq T where(T a _) == (T b _) = a == bclass Hash h wherehash :: h - Int64instance Hash T wherehash (T a _) = a type HM = HashMap T (Ptr Int)data HashMap k v = HashMap !(IOArray Int64 [(k,v)])tsz = 4096newTable :: IO (HashMap k v)newTable = do!array - newArray (0,(tsz-1)) []return $ HashMap array lookup :: (Eq k, Hash k)=HashMap k v - k - IO (Maybe v)lookup (HashMap a) k = dolet h = hash k!lst - readArray a (h .. (tsz-1))let loop [] = return Nothingloop ((!k',!v):xs) | k /= k' = loop xs| otherwise = return $ Just vloop lst insert :: (Eq k, Hash k)=HashMap k v - k - v - IO ()insert (HashMap a) k v = dolet h = hash k!lst -
Re: [Haskell-cafe] my take at knucleotide
Finally, I have made it ;)Trick was in more threads . For some reason if I run 64 (sweet spot) threads program runsfaster both with -threaded and without ;)Other trick is that I don't convert to uppercase (shaves second) rather pack nucleotidein 64 bit int.Program runs 30% faster multithreaded (scales better) than current entry, and consumes 50% less memory,and is shorter.If someone can see some improvement please post, otherwise I will contribute this program. {-# Language BangPatterns #-} The Computer Language Benchmarks Game-- http://benchmarksgame.alioth.debian.org/ Contributed by Branimir Maksimovic--import Data.Bitsimport Data.Charimport Data.Intimport Data.Listimport Data.IORefimport Data.Array.Unboxedimport Data.Array.Baseimport qualified Data.HashTable.IO as Himport Data.Hashableimport qualified Data.ByteString.Char8 as Simport Control.Concurrentimport Text.Printf main = dos - S.getContentslet(_,subs) = S.breakSubstring (S.pack THREE) scontent = (S.filter ((/=) '\n') . S.dropWhile ((/=) '\n')) subs mapM_ (execute content) actions data Actions = I Int | S Stringactions = [I 1,I 2, S GGT,S GGTA,S GGTATT,S GGTAAATT,S GGTAAATTTATAGT]execute content (I i) = writeFrequencies content iexecute content (S s) = writeCount content s writeFrequencies :: S.ByteString - Int - IO ()writeFrequencies input size = doht - tcalculate input sizelst - H.foldM (\lst (k,v)-do v' - readIORef vreturn $ insertBy (\(_,x) (_,y)-y `compare` x) (k,v') lst) [] htlet sum = fromIntegral ((S.length input) + 1 - size)mapM_ (\(k,v)- doprintf %s %.3f\n (toString k) ((100 * (fromIntegral v)/sum)::Double)) lstputChar '\n' writeCount :: S.ByteString - String - IO ()writeCount input string = do let size = length stringht - tcalculate input sizelet k = T (toNum (S.pack string) 0 size) sizeres - H.lookup ht kcase res of Nothing - putStrLn $ string ++ not found...Just v - do r - readIORef vprintf %d\t%s\n r string tcalculate :: S.ByteString - Int - IO HMtcalculate input size = dolet l = [0..63]actions = map (\i - (calculate input i size (length l))) lvars - mapM (\action - dovar - newEmptyMVar forkIO $ doanswer - action putMVar var answerreturn var) actionsresult - newTable :: IO HMresults - mapM takeMVar varsmapM_ (\ht - H.foldM (\lst (k,v) - do res - H.lookup lst k case res ofNothing - do r1 - readIORef v r2 - newIORef r1H.insert lst k r2 Just v1 - dor1 - readIORef v1r2 - readIORef v writeIORef v1 (r1+r2)return lst) result ht) resultsreturn result calculate :: S.ByteString - Int - Int - Int - IO HMcalculate input beg size incr = do!ht - newTable :: IO HMletcalculate' i | i = ((S.length input)+1 - size) = return ht | otherwise = do res - H.lookup ht kcase res ofNothing - do !r - newIORef 1H.insert ht k r Just v - do!r - readIORef v writeIORef v (r+1)calculate' (i+incr)where k = T (toNum input i size) sizecalculate' beg toNum :: S.ByteString - Int - Int - Int64toNum s beg size = toNum' 0 size wheretoNum' v 0 = vtoNum' v i = toNum' ((v `shiftL` 2) .|. (toNumA `unsafeAt` (ord (S.index s (beg+i-1) (i-1) toString :: T - StringtoString (T v s) = toString' v swhere toString' v 0 = []toString' v i = case v..3 of 0 - 'A'1 - 'C'2 - 'T' 3 - 'G' : toString' (v `shiftR` 2) (i-1) toNumA :: UArray Int Int64toNumA = array (0,255) [(ord 'a',0),(ord 'c',1),(ord 't',2),(ord 'g',3),(ord 'A',0),(ord 'C',1),(ord 'T',2),(ord 'G',3)] data T = T !Int64 !Intinstance Eq T where(T a _) == (T b _) = a == binstance Hashable T wherehashWithSalt _ (T a _) = fromIntegral a type HM = H.BasicHashTable T (IORef Int)newTable = H.new Date: Sun, 24 Mar 2013 20:12:57 +0100 Subject: Re: [Haskell-cafe] my take at knucleotide From: g...@gregorycollins.net To: bm...@hotmail.com CC: haskell-cafe@haskell.org What happens to performance if you compile and link with cabal install --constraint='hashable 1.2' ? G
[Haskell-cafe] my take at knucleotide
Hi, I have tried to implement knucleotide benchmark program this time:http://benchmarksgame.alioth.debian.org/u64q/performance.php?test=knucleotide Implementation is shorter (uses hashtable from hashtables package),but two time slower then current Haskell entry ( which is too low levelfor my taste :)).What is interesting is that if I try to place Int64 as a key tohash table, performance is even slower.Strange that dropping and taking from bytestring would befaster than packing string in 64 bit int and directly indexing. If someone can see something that can bring performance on parwith current haskell entry , I would post it , otherwise no point,except that program is shorter and not low level. {-# Language BangPatterns #-} The Computer Language Benchmarks Game-- http://benchmarksgame.alioth.debian.org/ Contributed by Branimir Maksimovic--import Data.Charimport Data.Listimport Data.IORefimport qualified Data.HashTable.IO as Himport qualified Data.ByteString.Char8 as Simport Control.Concurrentimport Text.Printf main = dos - S.getContentslet content = (S.map toUpper . S.concat . tail . dropWhile (\l-not $ S.isPrefixOf (S.pack THREE) l) . S.lines) smapM_ (execute content) actions data Actions = I Int | S Stringactions = [I 1,I 2, S GGT,S GGTA,S GGTATT,S GGTAAATT,S GGTAAATTTATAGT]execute content (I i) = writeFrequencies content iexecute content (S s) = writeCount content s writeFrequencies input size = doht - tcalculate input sizelst - H.foldM (\lst (k,v)-do v' - readIORef vreturn $ insertBy (\(_,x) (_,y)-y `compare` x) (k,v') lst) [] htlet sum = fromIntegral ((S.length input) + 1 - size)mapM_ (\(k,v)- doprintf %s %.3f\n (S.unpack k) ((100 * (fromIntegral v)/sum)::Double)) lstputChar '\n' writeCount input string = dolet size = length stringht - tcalculate input sizeres - H.lookup ht (S.pack string)case res of Nothing - putStrLn $ string ++ not found...Just v - dor - readIORef vprintf %d\t%s\n r (string::String) tcalculate input size = dolet l = [0..7]actions = map (\i - (calculate input i size (length l))) lvars - mapM (\action - do var - newEmptyMVarforkIO $ do answer - actionputMVar var answer return var) actionsresult - newTableresults - mapM takeMVar vars mapM_ (\ht - H.foldM (\lst (k,v) - do res - H.lookup lst kcase res of Nothing - dor1 - readIORef v r2 - newIORef r1 H.insert lst k r2Just v1 - do r1 - readIORef v1r2 - readIORef vwriteIORef v1 (r1+r2) return lst) result ht) resultsreturn resultcalculate input beg size incr = doht - newTableletcalculate' :: S.ByteString - Int - IO HashTablecalculate' str i | i = ((S.length input)+1 - size) = return ht | otherwise = dores - H.lookup ht kcase res ofNothing - do !r - newIORef 1H.insert ht k rJust v - do!r - readIORef vwriteIORef v (r+1)calculate' (S.drop incr str) (i+incr)where k = S.take size strcalculate' (S.drop beg input) beg type HashTable = H.BasicHashTable S.ByteString (IORef Int) newTable :: IO HashTablenewTable = H.new ___ Haskell-Cafe mailing list Haskell-Cafe@haskell.org http://www.haskell.org/mailman/listinfo/haskell-cafe
Re: [Haskell-cafe] my take at knucleotide
What happens to performance if you compile and link with cabal install --constraint='hashable 1.2' ? G On Sun, Mar 24, 2013 at 4:08 PM, Branimir Maksimovic bm...@hotmail.comwrote: Hi, I have tried to implement knucleotide benchmark program this time: http://benchmarksgame.alioth.debian.org/u64q/performance.php?test=knucleotide Implementation is shorter (uses hashtable from hashtables package), but two time slower then current Haskell entry ( which is too low level for my taste :)). What is interesting is that if I try to place Int64 as a key to hash table, performance is even slower. Strange that dropping and taking from bytestring would be faster than packing string in 64 bit int and directly indexing. If someone can see something that can bring performance on par with current haskell entry , I would post it , otherwise no point, except that program is shorter and not low level. {-# Language BangPatterns #-} -- -- The Computer Language Benchmarks Game -- http://benchmarksgame.alioth.debian.org/ -- -- Contributed by Branimir Maksimovic -- import Data.Char import Data.List import Data.IORef import qualified Data.HashTable.IO as H import qualified Data.ByteString.Char8 as S import Control.Concurrent import Text.Printf main = do s - S.getContents let content = (S.map toUpper . S.concat . tail . dropWhile (\l-not $ S.isPrefixOf (S.pack THREE) l) . S.lines) s mapM_ (execute content) actions data Actions = I Int | S String actions = [I 1,I 2, S GGT,S GGTA,S GGTATT,S GGTAAATT,S GGTAAATTTATAGT] execute content (I i) = writeFrequencies content i execute content (S s) = writeCount content s writeFrequencies input size = do ht - tcalculate input size lst - H.foldM (\lst (k,v)-do v' - readIORef v return $ insertBy (\(_,x) (_,y)-y `compare` x) (k,v') lst) [] ht let sum = fromIntegral ((S.length input) + 1 - size) mapM_ (\(k,v)- do printf %s %.3f\n (S.unpack k) ((100 * (fromIntegral v)/sum)::Double)) lst putChar '\n' writeCount input string = do let size = length string ht - tcalculate input size res - H.lookup ht (S.pack string) case res of Nothing - putStrLn $ string ++ not found... Just v - do r - readIORef v printf %d\t%s\n r (string::String) tcalculate input size = do let l = [0..7] actions = map (\i - (calculate input i size (length l))) l vars - mapM (\action - do var - newEmptyMVar forkIO $ do answer - action putMVar var answer return var) actions result - newTable results - mapM takeMVar vars mapM_ (\ht - H.foldM (\lst (k,v) - do res - H.lookup lst k case res of Nothing - do r1 - readIORef v r2 - newIORef r1 H.insert lst k r2 Just v1 - do r1 - readIORef v1 r2 - readIORef v writeIORef v1 (r1+r2) return lst) result ht) results return result calculate input beg size incr = do ht - newTable let calculate' :: S.ByteString - Int - IO HashTable calculate' str i | i = ((S.length input)+1 - size) = return ht | otherwise = do res - H.lookup ht k case res of Nothing - do !r - newIORef 1 H.insert ht k r Just v - do !r - readIORef v writeIORef v (r+1) calculate' (S.drop incr str) (i+incr) where k = S.take size str calculate' (S.drop beg input) beg type HashTable = H.BasicHashTable S.ByteString (IORef Int) newTable :: IO HashTable newTable = H.new ___ Haskell-Cafe mailing list Haskell-Cafe@haskell.org http://www.haskell.org/mailman/listinfo/haskell-cafe -- Gregory Collins g...@gregorycollins.net ___ Haskell-Cafe mailing list Haskell-Cafe@haskell.org http://www.haskell.org/mailman/listinfo/haskell-cafe
Re: [Haskell-cafe] my take at knucleotide
I have placed constraint on version of hashable, time is exactly same.bmaxa@maxa:~/shootout/knucleotide$ cabal list hashable* hashable Synopsis: A class for types that can be converted to a hash valueDefault available version: 1.2.0.5Installed versions: 1.1.2.5Homepage: http://github.com/tibbe/hashableLicense: BSD3 Date: Sun, 24 Mar 2013 20:12:57 +0100 Subject: Re: [Haskell-cafe] my take at knucleotide From: g...@gregorycollins.net To: bm...@hotmail.com CC: haskell-cafe@haskell.org What happens to performance if you compile and link with cabal install --constraint='hashable 1.2' ? G ___ Haskell-Cafe mailing list Haskell-Cafe@haskell.org http://www.haskell.org/mailman/listinfo/haskell-cafe