Oooops, stupid error before, fixed below. Missed it due to too few and simple tests, should've quickchecked :-/ > > Cheers, > Daniel > > -- KMP algorithm for lazy ByteStrings > {-# OPTIONS_GHC -fbang-patterns #-} > module KMP (kmpMatch) where > > import qualified Data.Array.Base as Base (unsafeAt) > import Data.Array.Unboxed (UArray, listArray) > import qualified Data.Array as A (listArray, (!), elems) > > import qualified Data.ByteString.Lazy as B > import qualified Data.ByteString as S > import qualified Data.ByteString.Base as B (unsafeHead, unsafeTail, > unsafeDrop, unsafeIndex) > import Data.Int (Int64) > import Data.Word (Word8) > > kmpMatch :: B.ByteString -> B.ByteString -> Int64 > kmpMatch patLazy search > > | B.null patLazy = 0 > | otherwise = kmp 0 0 search > > where > pat :: S.ByteString > pat = S.concat (B.toChunks patLazy) > patLen = S.length pat > sym :: Int -> Word8 > sym = B.unsafeIndex pat > bord = A.listArray (0,patLen) $ > (-1):0:[getS (sym i) i + 1 | i <- [1 .. patLen - 1]] > where > getS s n > > | m < 0 || s == sym m = m > | otherwise = getS s m > > where > m = bord A.! n > borders :: UArray Int Int > borders = listArray (0,patLen) $ A.elems bord > (?) :: UArray Int Int -> Int -> Int > (?) = Base.unsafeAt > getShift :: Word8 -> Int -> Int > getShift s n = help n > where > help k > > | m < 0 || sym m == s = m > | otherwise = help m > > where > m = borders ? k > kmp :: Int64 -> Int -> B.ByteString -> Int64 > kmp !idx !match !srch > > | patLen == match = idx - fromIntegral match > | B.null srch = -1 > | sym match == B.head srch = kmp (idx+1) (match+1) (B.tail > | srch) match == 0 = kmp (idx+1) 0 (B.tail srch) > | otherwise = > case getShift (B.head srch) match of > -1 -> kmp idx 0 srch > shft -> kmp (idx+1) (shft+1) (B.tail srch)
_______________________________________________ Haskell-Cafe mailing list Haskell-Cafe@haskell.org http://www.haskell.org/mailman/listinfo/haskell-cafe