Re: [Haskell-cafe] my take at knucleotide

2013-03-27 Thread Branimir Maksimovic
I have posted this version.Mad home grown HashMap and replaced IOref with 
Ptr.This made program twice as fast as current entry.
{-# Language BangPatterns #-} The Computer Language Benchmarks Game-- 
http://benchmarksgame.alioth.debian.org/ Contributed by Branimir 
Maksimovic--import Data.Bitsimport Data.Charimport Data.Intimport 
Data.Listimport Data.Array.Baseimport Data.Array.Unboxedimport 
Data.Array.IOimport qualified Data.ByteString.Char8 as Simport 
Foreign.Ptrimport Foreign.Storableimport Foreign.Marshal.Allocimport 
Control.Concurrentimport Text.Printf
main = dolet skip = dol - S.getLineif S.isPrefixOf 
(S.pack THREE) lthen return ()else skip
skips - S.getContentslet content = S.filter ((/=) '\n') s;mapM_ 
(execute content) actionsdata Actions = I Int | S Stringactions = [I 1,I 2, 
  S GGT,S GGTA,S GGTATT,S GGTAAATT,S 
GGTAAATTTATAGT]execute content (I i) = writeFrequencies content iexecute 
content (S s) = writeCount content s
writeFrequencies :: S.ByteString - Int - IO ()writeFrequencies input size = 
doht - tcalculate input sizelst - Main.foldM (\lst (k,v)-do 
v' - peek vreturn $ (k,v'):lst) [] htlet sorted = sortBy (\(_,x) 
(_,y) - y `compare` x) lstsum = fromIntegral ((S.length input) + 1 - 
size)mapM_ (\(k,v)- doprintf %s %.3f\n (toString k) 
((100 * (fromIntegral v)/sum)::Double)) sortedputChar '\n'
writeCount :: S.ByteString - String - IO ()writeCount input string = do
let size = length stringk = T (toNum (S.pack string) 0 size) sizeht 
- tcalculate input sizeres - Main.lookup ht kcase res of 
Nothing - putStrLn $ string ++  not found...Just v - do
r - peek vprintf %d\t%s\n r string
tcalculate :: S.ByteString - Int - IO HMtcalculate input size = dolet 
l = [0..63]actions = map (\i - (calculate input i size (length 
l))) lvars - mapM (\action - dovar - newEmptyMVar
forkIO $ doanswer - action 
   putMVar var answerreturn var) actionsresult 
- newTable :: IO HMresults - mapM takeMVar varsmapM_ (\ht - 
Main.foldM (\lst (k,v) - do res - Main.lookup lst 
kcase res ofNothing 
- dor1 - peek v   
 r2 - mallocpoke r2 r1 
   Main.insert lst k r2Just 
v1 - dor1 - peek v1   
 r2 - peek vpoke v1 (r1+r2)
return lst) result ht) resultsreturn result
calculate :: S.ByteString - Int - Int - Int - IO HMcalculate input beg 
size incr = do!ht - newTable :: IO HMletcalculate' i  
| i = ((S.length input)+1 - size) = return ht | otherwise = do 
   let k =  T (toNum input i size) sizeres - Main.lookup ht k  
  case res ofNothing - do!r - malloc  
  poke r 1Main.insert ht k r
Just v - do !r - peek vpoke v (r+1)   
 calculate' (i+incr)calculate' beg
toNum :: S.ByteString - Int - Int - Int64toNum s beg size = toNum' 0 size
wheretoNum' v 0 = vtoNum' v i = toNum' ((v `shiftL` 2) .|.  
   (toNumA `unsafeAt` (ord (S.index s (beg+i-1) (i-1)
toString :: T - StringtoString (T v s) = toString' v swhere
toString' v 0 = []toString' v i = case v..3 of
0 - 'A'1 - 'C'2 - 'T'
3 - 'G'  : toString' (v `shiftR` 2) (i-1)
toNumA :: UArray Int Int64toNumA = array (0,255) [(ord 'a',0),(ord 'c',1),(ord 
't',2),(ord 'g',3),(ord 'A',0),(ord 'C',1),(ord 'T',2),(ord 'G',3)]
data T = T !Int64 !Intinstance Eq T where(T a _) == (T b _) = a == bclass 
Hash h wherehash :: h - Int64instance Hash T wherehash (T a _) = a
type HM = HashMap T (Ptr Int)data HashMap k v = HashMap !(IOArray Int64 
[(k,v)])tsz = 4096newTable :: IO (HashMap k v)newTable = do!array - 
newArray (0,(tsz-1)) []return $ HashMap array
lookup :: (Eq k, Hash k)=HashMap k v - k - IO (Maybe v)lookup (HashMap a) k 
= dolet h = hash k!lst - readArray a (h .. (tsz-1))let
loop [] = return Nothingloop ((!k',!v):xs) | k /= k' = loop 
xs| otherwise = return $ Just vloop lst
insert :: (Eq k, Hash k)=HashMap k v - k - v - IO ()insert (HashMap a) k v 
= dolet h = hash k!lst - 

Re: [Haskell-cafe] my take at knucleotide

2013-03-26 Thread Branimir Maksimovic
Finally, I have made it ;)Trick was in more threads . For some reason if I run 
64 (sweet spot) threads program runsfaster both with -threaded and without 
;)Other trick is that I don't convert to uppercase (shaves second) rather pack 
nucleotidein 64 bit int.Program runs 30% faster multithreaded (scales better) 
than current entry, and consumes 50% less memory,and is shorter.If someone can 
see some improvement please post, otherwise I will contribute this program.

{-# Language BangPatterns #-} The Computer Language Benchmarks Game-- 
http://benchmarksgame.alioth.debian.org/ Contributed by Branimir 
Maksimovic--import Data.Bitsimport Data.Charimport Data.Intimport 
Data.Listimport Data.IORefimport Data.Array.Unboxedimport Data.Array.Baseimport 
qualified Data.HashTable.IO as Himport Data.Hashableimport qualified 
Data.ByteString.Char8 as Simport Control.Concurrentimport Text.Printf
main = dos - S.getContentslet(_,subs) = S.breakSubstring 
(S.pack THREE) scontent = (S.filter ((/=) '\n') .
S.dropWhile ((/=) '\n')) subs mapM_ (execute content) actions
data Actions = I Int | S Stringactions = [I 1,I 2,  
 S GGT,S GGTA,S GGTATT,S GGTAAATT,S GGTAAATTTATAGT]execute 
content (I i) = writeFrequencies content iexecute content (S s) = writeCount 
content s
writeFrequencies :: S.ByteString - Int - IO ()writeFrequencies input size = 
doht - tcalculate input sizelst - H.foldM (\lst (k,v)-do v' 
- readIORef vreturn $ insertBy (\(_,x) (_,y)-y `compare` x) (k,v') 
lst) [] htlet sum = fromIntegral ((S.length input) + 1 - size)mapM_ 
(\(k,v)- doprintf %s %.3f\n (toString k) ((100 * 
(fromIntegral v)/sum)::Double)) lstputChar '\n'
writeCount :: S.ByteString - String - IO ()writeCount input string = do
let size = length stringht - tcalculate input sizelet k = T (toNum 
(S.pack string) 0 size) sizeres - H.lookup ht kcase res of 
Nothing - putStrLn $ string ++  not found...Just v - do
r - readIORef vprintf %d\t%s\n r string
tcalculate :: S.ByteString - Int - IO HMtcalculate input size = dolet 
l = [0..63]actions = map (\i - (calculate input i size (length 
l))) lvars - mapM (\action - dovar - newEmptyMVar
forkIO $ doanswer - action 
   putMVar var answerreturn var) actionsresult 
- newTable :: IO HMresults - mapM takeMVar varsmapM_ (\ht - H.foldM 
(\lst (k,v) - do res - H.lookup lst k 
   case res ofNothing - do 
   r1 - readIORef v
r2 - newIORef r1H.insert lst k r2  
  Just v1 - dor1 - 
readIORef v1r2 - readIORef v   
 writeIORef v1 (r1+r2)return 
lst) result ht) resultsreturn result
calculate :: S.ByteString - Int - Int - Int - IO HMcalculate input beg 
size incr = do!ht - newTable :: IO HMletcalculate' i  
| i = ((S.length input)+1 - size) = return ht | otherwise = do 
   res - H.lookup ht kcase res ofNothing - do 
   !r - newIORef 1H.insert ht k r  
  Just v - do!r - readIORef v
writeIORef v (r+1)calculate' (i+incr)where k = T 
(toNum input i size) sizecalculate' beg
toNum :: S.ByteString - Int - Int - Int64toNum s beg size = toNum' 0 size
wheretoNum' v 0 = vtoNum' v i = toNum' ((v `shiftL` 2) .|.  
   (toNumA `unsafeAt` (ord (S.index s (beg+i-1) (i-1)
toString :: T - StringtoString (T v s) = toString' v swhere
toString' v 0 = []toString' v i = case v..3 of
0 - 'A'1 - 'C'2 - 'T'
3 - 'G'  : toString' (v `shiftR` 2) (i-1)
toNumA :: UArray Int Int64toNumA = array (0,255) [(ord 'a',0),(ord 'c',1),(ord 
't',2),(ord 'g',3),(ord 'A',0),(ord 'C',1),(ord 'T',2),(ord 'G',3)]
data T = T !Int64 !Intinstance Eq T where(T a _) == (T b _) = a == 
binstance Hashable T wherehashWithSalt _ (T a _) = fromIntegral a
type HM = H.BasicHashTable T (IORef Int)newTable = H.new

Date: Sun, 24 Mar 2013 20:12:57 +0100
Subject: Re: [Haskell-cafe] my take at knucleotide
From: g...@gregorycollins.net
To: bm...@hotmail.com
CC: haskell-cafe@haskell.org

What happens to performance if you compile and link with cabal install 
--constraint='hashable  1.2' ?
G

[Haskell-cafe] my take at knucleotide

2013-03-24 Thread Branimir Maksimovic
Hi, I have tried to implement knucleotide benchmark program this 
time:http://benchmarksgame.alioth.debian.org/u64q/performance.php?test=knucleotide
Implementation is shorter (uses hashtable from hashtables package),but two time 
slower then current Haskell entry ( which is too low levelfor my taste :)).What 
is interesting is that if I try to place Int64 as a key tohash table, 
performance is even slower.Strange that dropping and taking from bytestring 
would befaster than packing string in 64 bit int and directly indexing.
If someone can see something that can bring performance on parwith current 
haskell entry , I would post it , otherwise no point,except that program is 
shorter and not low level.
{-# Language BangPatterns #-} The Computer Language Benchmarks Game-- 
http://benchmarksgame.alioth.debian.org/ Contributed by Branimir 
Maksimovic--import Data.Charimport Data.Listimport Data.IORefimport qualified 
Data.HashTable.IO as Himport qualified Data.ByteString.Char8 as Simport 
Control.Concurrentimport Text.Printf
main = dos - S.getContentslet content = (S.map toUpper . S.concat . 
tail .  dropWhile (\l-not $ S.isPrefixOf (S.pack THREE) l) 
.  S.lines) smapM_ (execute content) actions
data Actions = I Int | S Stringactions = [I 1,I 2,   S GGT,S GGTA,S 
GGTATT,S GGTAAATT,S GGTAAATTTATAGT]execute content (I i) = 
writeFrequencies content iexecute content (S s) = writeCount content s
writeFrequencies input size = doht - tcalculate input sizelst - 
H.foldM (\lst (k,v)-do v' - readIORef vreturn $ insertBy 
(\(_,x) (_,y)-y `compare` x) (k,v') lst) [] htlet sum = fromIntegral 
((S.length input) + 1 - size)mapM_ (\(k,v)- doprintf %s %.3f\n   
  (S.unpack k) ((100 * (fromIntegral v)/sum)::Double)) lstputChar 
'\n'
writeCount input string = dolet size = length stringht - tcalculate 
input sizeres - H.lookup ht (S.pack string)case res of Nothing 
- putStrLn $ string ++  not found...Just v - dor - 
readIORef vprintf %d\t%s\n r (string::String)
tcalculate input size = dolet l = [0..7]actions = map (\i 
- (calculate input i size (length l))) lvars - mapM (\action - do
var - newEmptyMVarforkIO $ do  
  answer - actionputMVar var answer
return var) actionsresult - newTableresults - mapM takeMVar vars  
  mapM_ (\ht - H.foldM (\lst (k,v) - do res - 
H.lookup lst kcase res of   
 Nothing - dor1 - readIORef v 
   r2 - newIORef r1
H.insert lst k r2Just v1 - do  
  r1 - readIORef v1r2 - 
readIORef vwriteIORef v1 (r1+r2)
return lst) result ht) resultsreturn resultcalculate 
input beg size incr = doht - newTableletcalculate' :: 
S.ByteString - Int - IO HashTablecalculate' str i  | i = 
((S.length input)+1 - size) = return ht | otherwise = dores 
- H.lookup ht kcase res ofNothing - do
!r - newIORef 1H.insert ht k rJust 
v - do!r - readIORef vwriteIORef v 
(r+1)calculate' (S.drop incr str) (i+incr)where k = 
S.take size strcalculate' (S.drop beg input) beg
type HashTable = H.BasicHashTable S.ByteString (IORef Int) newTable :: IO 
HashTablenewTable = H.new
  ___
Haskell-Cafe mailing list
Haskell-Cafe@haskell.org
http://www.haskell.org/mailman/listinfo/haskell-cafe


Re: [Haskell-cafe] my take at knucleotide

2013-03-24 Thread Gregory Collins
What happens to performance if you compile and link with cabal install
--constraint='hashable  1.2' ?

G


On Sun, Mar 24, 2013 at 4:08 PM, Branimir Maksimovic bm...@hotmail.comwrote:

 Hi, I have tried to implement knucleotide benchmark program this time:

 http://benchmarksgame.alioth.debian.org/u64q/performance.php?test=knucleotide

 Implementation is shorter (uses hashtable from hashtables package),
 but two time slower then current Haskell entry ( which is too low level
 for my taste :)).
 What is interesting is that if I try to place Int64 as a key to
 hash table, performance is even slower.
 Strange that dropping and taking from bytestring would be
 faster than packing string in 64 bit int and directly indexing.

 If someone can see something that can bring performance on par
 with current haskell entry , I would post it , otherwise no point,
 except that program is shorter and not low level.

 {-# Language BangPatterns #-}
 --
 -- The Computer Language Benchmarks Game
 -- http://benchmarksgame.alioth.debian.org/
 --
 -- Contributed by Branimir Maksimovic
 --
 import Data.Char
 import Data.List
 import Data.IORef
 import qualified Data.HashTable.IO as H
 import qualified Data.ByteString.Char8 as S
 import Control.Concurrent
 import Text.Printf

 main = do
 s - S.getContents
 let content = (S.map toUpper . S.concat . tail .
   dropWhile (\l-not $ S.isPrefixOf (S.pack THREE) l) .
   S.lines) s
 mapM_ (execute content) actions

 data Actions = I Int | S String
 actions = [I 1,I 2,
S GGT,S GGTA,S GGTATT,S GGTAAATT,S
 GGTAAATTTATAGT]
 execute content (I i) = writeFrequencies content i
 execute content (S s) = writeCount content s

 writeFrequencies input size = do
 ht - tcalculate input size
 lst - H.foldM (\lst (k,v)-do
 v' - readIORef v
 return $ insertBy (\(_,x) (_,y)-y `compare` x) (k,v') lst) [] ht
 let sum = fromIntegral ((S.length input) + 1 - size)
 mapM_ (\(k,v)- do
 printf %s %.3f\n
 (S.unpack k) ((100 * (fromIntegral v)/sum)::Double)) lst
 putChar '\n'

 writeCount input string = do
 let size = length string
 ht - tcalculate input size
 res - H.lookup ht (S.pack string)
 case res of
 Nothing - putStrLn $ string ++  not found...
 Just v - do
 r - readIORef v
 printf %d\t%s\n r (string::String)

 tcalculate input size = do
 let
 l = [0..7]
 actions = map (\i - (calculate input i size (length l))) l
 vars - mapM (\action - do
 var - newEmptyMVar
 forkIO $ do
 answer - action
 putMVar var answer
 return var) actions
 result - newTable
 results - mapM takeMVar vars
 mapM_ (\ht - H.foldM (\lst (k,v) - do
 res - H.lookup lst k
 case res of
 Nothing - do
 r1 - readIORef v
 r2 - newIORef r1
 H.insert lst k r2
 Just v1 - do
 r1 - readIORef v1
 r2 - readIORef v
 writeIORef v1 (r1+r2)
 return lst) result ht) results
 return result

 calculate input beg size incr = do
 ht - newTable
 let
 calculate' :: S.ByteString - Int - IO HashTable
 calculate' str i
  | i = ((S.length input)+1 - size) = return ht
  | otherwise = do
 res - H.lookup ht k
 case res of
 Nothing - do
 !r - newIORef 1
 H.insert ht k r
 Just v - do
 !r - readIORef v
 writeIORef v (r+1)
 calculate' (S.drop incr str) (i+incr)
 where k = S.take size str
 calculate' (S.drop beg input) beg

 type HashTable = H.BasicHashTable S.ByteString (IORef Int)
 newTable :: IO HashTable
 newTable = H.new


 ___
 Haskell-Cafe mailing list
 Haskell-Cafe@haskell.org
 http://www.haskell.org/mailman/listinfo/haskell-cafe




-- 
Gregory Collins g...@gregorycollins.net
___
Haskell-Cafe mailing list
Haskell-Cafe@haskell.org
http://www.haskell.org/mailman/listinfo/haskell-cafe


Re: [Haskell-cafe] my take at knucleotide

2013-03-24 Thread Branimir Maksimovic
I have placed constraint on version of hashable, time is exactly 
same.bmaxa@maxa:~/shootout/knucleotide$ cabal list hashable* hashable
Synopsis: A class for types that can be converted to a hash valueDefault 
available version: 1.2.0.5Installed versions: 1.1.2.5Homepage: 
http://github.com/tibbe/hashableLicense:  BSD3

Date: Sun, 24 Mar 2013 20:12:57 +0100
Subject: Re: [Haskell-cafe] my take at knucleotide
From: g...@gregorycollins.net
To: bm...@hotmail.com
CC: haskell-cafe@haskell.org

What happens to performance if you compile and link with cabal install 
--constraint='hashable  1.2' ?
G


  ___
Haskell-Cafe mailing list
Haskell-Cafe@haskell.org
http://www.haskell.org/mailman/listinfo/haskell-cafe