This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "snap-core".
The branch, master has been updated
via 405d54d744fa6823a5fca6b0acb38aa212ec1901 (commit)
from 79bdd2b4dd9b9d140b145c3db6f54f9c0c4c8bbe (commit)
Summary of changes:
src/Snap/Internal/Iteratee/KnuthMorrisPratt.hs | 108 ++++++++++++++----------
test/snap-core-testsuite.cabal | 1 +
test/suite/Snap/Iteratee/Tests.hs | 64 ++++++++++++++
3 files changed, 130 insertions(+), 43 deletions(-)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
commit 405d54d744fa6823a5fca6b0acb38aa212ec1901
Author: Gregory Collins <[email protected]>
Date: Sat Feb 5 19:44:24 2011 -0500
Fix broken KMP implementation (oops)
diff --git a/src/Snap/Internal/Iteratee/KnuthMorrisPratt.hs
b/src/Snap/Internal/Iteratee/KnuthMorrisPratt.hs
index 10e0dc1..1161f12 100644
--- a/src/Snap/Internal/Iteratee/KnuthMorrisPratt.hs
+++ b/src/Snap/Internal/Iteratee/KnuthMorrisPratt.hs
@@ -10,6 +10,7 @@ module Snap.Internal.Iteratee.KnuthMorrisPratt
import Control.Monad.Trans
import qualified Data.ByteString.Char8 as S
import Data.ByteString.Char8 (ByteString)
+import Data.ByteString.Unsafe as S
import Data.Enumerator hiding (head)
import qualified Data.Enumerator.List as EL
import qualified Data.Vector as V
@@ -17,14 +18,14 @@ import Data.Vector (Vector)
import qualified Data.Vector.Mutable as MV
import Prelude hiding (head)
-
------------------------------------------------------------------------------
data MatchInfo = Match !ByteString
| NoMatch !ByteString
deriving (Show)
------------------------------------------------------------------------------
-kmpEnumeratee :: (Monad m) =>
+-- FIXME: s/MonadIO/Monad/
+kmpEnumeratee :: (MonadIO m) =>
ByteString -- ^ needle
-> Enumeratee ByteString MatchInfo m a
kmpEnumeratee needle = checkDone (iter "" 0)
@@ -33,7 +34,7 @@ kmpEnumeratee needle = checkDone (iter "" 0)
table = buildKmpTable needle
--------------------------------------------------------------------------
- iter :: (Monad m) =>
+ iter :: (MonadIO m) =>
ByteString
-- ^ num bytes left over from previous match
-> Int -- ^ needle index
@@ -45,7 +46,7 @@ kmpEnumeratee needle = checkDone (iter "" 0)
(processChunk leftOver needleIndex k)
--------------------------------------------------------------------------
- finish :: (Monad m) =>
+ finish :: (MonadIO m) =>
ByteString
-> (Stream MatchInfo -> Iteratee MatchInfo m a)
-> Iteratee ByteString m (Step MatchInfo m a)
@@ -56,34 +57,36 @@ kmpEnumeratee needle = checkDone (iter "" 0)
checkDone (\k' -> lift $ runIteratee $ k' EOF) step
--------------------------------------------------------------------------
- processChunk :: (Monad m) =>
+ processChunk :: (MonadIO m) =>
ByteString
-> Int
-> (Stream MatchInfo -> Iteratee MatchInfo m a)
-> ByteString
-> Iteratee ByteString m (Step MatchInfo m a)
- processChunk !leftOver !needleIndex !k !input =
- go 0 needleIndex
+ processChunk !leftOver !needleIndex !k !input = go 0 needleIndex
+
where
- leftOverLen = S.length leftOver
- ilen = S.length input
+ !inputLen = S.length input
+ !leftOverLen = S.length leftOver
+ !totalLen = inputLen + leftOverLen
- ----------------------------------------------------------------------
+ -- m = start of match in leftOver + index
+ -- i = needle index
go !m !i
- | (mi >= ilen) = finalize m i
- | (S.index needle i == S.index input ii) =
- if i == needleLen-1
- then yieldMatch m
- else go m (i+1)
+ | (m+i >= totalLen) = finalize m i
+ | (S.unsafeIndex needle i == S.unsafeIndex input ii) =
+ if i == needleLen - 1
+ then yieldMatch m
+ else go m (i+1)
| otherwise = go m' i'
where
+ ii = i + m - leftOverLen
ti = V.unsafeIndex table i
- mi = m+i+leftOverLen
- ii = m+i-leftOverLen
m' = m + i - ti
i' = max 0 ti
+
----------------------------------------------------------------------
-- here we've reached the end of the input chunk. A couple of things
-- we know:
@@ -96,36 +99,55 @@ kmpEnumeratee needle = checkDone (iter "" 0)
-- * the input from [m..ilen) is a partial match that we need to feed
-- forward
finalize m i
- | m == 0 = iter (S.append leftOver input) i k
- | otherwise = if (S.null notmatching)
- then iter rest i k
- else do
- step <- lift $ runIteratee $ k chunk
- checkDone (iter rest i) step
- where
- (nomatch,rest) = S.splitAt m input
- notmatching = S.append leftOver nomatch
- chunk = Chunks [NoMatch notmatching]
+ | m == 0 = iter (S.append leftOver input) i k
+
+ | m < leftOverLen = do
+ -- here part of the leftover is the no match and we carry the
+ -- rest forward along with the input
+ let (nomatch, restLeftOver) = S.splitAt m leftOver
+ let rest = S.append restLeftOver input
+ let chunk = Chunks [NoMatch nomatch]
+ step <- lift $ runIteratee $ k chunk
+ checkDone (iter rest i) step
+
+ | otherwise = do
+ -- the whole leftOver part was garbage.
+ let m' = m - leftOverLen
+ let (nomatchInput, rest) = S.splitAt m' input
+ let nomatch = S.append leftOver nomatchInput
+ let chunk = Chunks [NoMatch nomatch]
+ step <- lift $ runIteratee $ k chunk
+ checkDone (iter rest i) step
+
----------------------------------------------------------------------
-- we got a match! We need to yield [0..m) to the inner iteratee as a
-- nomatch, then yield the needle, then go back to processing the rest
-- of the input from scratch. Same caveats re: m==0 apply here.
- yieldMatch m
- | m == 0 = do
- step <- lift $ runIteratee $ k $ Chunks [Match needle]
- checkDone (\k' -> processChunk "" 0 k' rest0) step
- | otherwise = do
- step <- lift $ runIteratee $ k $ Chunks [NoMatch notmatching]
- flip checkDone step $ \k' -> do
- step' <- lift $ runIteratee $ k' $ Chunks [Match needle]
- flip checkDone step' $ \k'' -> processChunk "" 0 k'' rest
+ yieldMatch m
+ | m == 0 = do
+ -- we have no garbage and just advance by the size of the
needle
+ step <- lift $ runIteratee $ k $ Chunks [Match needle]
+ -- we also can be sure here that the needle crosses the
+ -- leftOver..input boundary (otherwise we would have yielded
+ -- it earlier)
+ let m' = needleLen - leftOverLen
+ let rest = S.drop m' input
+ checkDone (\k' -> processChunk "" 0 k' rest) step
- where
- nomatch = S.take m input
- notmatching = S.append leftOver nomatch
- rest0 = S.drop (m+needleLen-leftOverLen) input
- rest = S.drop (m+needleLen) input
+ | otherwise = do
+ let (garbage,rest) =
+ if m < leftOverLen
+ then let (a,b) = S.splitAt m leftOver
+ in (a, S.drop needleLen $ S.append b input)
+ else let m' = m - leftOverLen
+ (a,b) = S.splitAt m' input
+ in (S.append leftOver a, S.drop needleLen b)
+
+ step <- lift $ runIteratee $ k $ Chunks [NoMatch garbage]
+ flip checkDone step $ \k' -> do
+ step' <- lift $ runIteratee $ k' $ Chunks [Match needle]
+ flip checkDone step' $ \k'' -> processChunk "" 0 k'' rest
------------------------------------------------------------------------------
@@ -144,8 +166,8 @@ buildKmpTable needle = V.create $ do
if pos >= needleLen
then return t
else do
- let wPos1 = S.index needle (pos-1)
- let wCnd = S.index needle cnd
+ let wPos1 = S.unsafeIndex needle (pos-1)
+ let wCnd = S.unsafeIndex needle cnd
if wPos1 == wCnd
then do
diff --git a/test/snap-core-testsuite.cabal b/test/snap-core-testsuite.cabal
index 9c96685..79c7528 100644
--- a/test/snap-core-testsuite.cabal
+++ b/test/snap-core-testsuite.cabal
@@ -24,6 +24,7 @@ Executable testsuite
attoparsec >= 0.8.1 && < 0.9,
attoparsec-enumerator >= 0.2.0.3,
base >= 4 && < 5,
+ base16-bytestring == 0.1.*,
blaze-builder >= 0.2.1.4 && <0.3,
bytestring,
bytestring-nums,
diff --git a/test/suite/Snap/Iteratee/Tests.hs
b/test/suite/Snap/Iteratee/Tests.hs
index e4fca4f..d693bb1 100644
--- a/test/suite/Snap/Iteratee/Tests.hs
+++ b/test/suite/Snap/Iteratee/Tests.hs
@@ -12,6 +12,7 @@ import Control.Exception hiding (try, assert)
import Control.Monad
import Control.Monad.Identity
import Control.Monad.Trans
+import qualified Data.ByteString.Base16 as B16
import Data.ByteString.Char8 (ByteString)
import qualified Data.ByteString.Char8 as S
import qualified Data.ByteString.Lazy.Char8 as L
@@ -22,12 +23,14 @@ import System.Timeout
import Test.Framework
import Test.Framework.Providers.QuickCheck2
import Test.QuickCheck
+import Test.QuickCheck.Gen
import qualified Test.QuickCheck.Monadic as QC
import Test.QuickCheck.Monadic hiding (run)
import Test.Framework.Providers.HUnit
import qualified Test.HUnit as H
import Snap.Iteratee
+import Snap.Internal.Iteratee.KnuthMorrisPratt
import Snap.Test.Common ()
import Snap.Internal.Iteratee.Debug
@@ -74,6 +77,7 @@ tests = [ testEnumBS
, testCountBytes2
, testKillIfTooSlow1
, testKillIfTooSlow2
+ , testKMP
]
testEnumBS :: Test
@@ -400,6 +404,7 @@ testCountBytes = testProperty "iteratee/countBytes" $
n = L.length s
+------------------------------------------------------------------------------
testCountBytes2 :: Test
testCountBytes2 = testProperty "iteratee/countBytes2" $
monadicIO $ forAllM arbitrary prop
@@ -423,6 +428,65 @@ testCountBytes2 = testProperty "iteratee/countBytes2" $
------------------------------------------------------------------------------
+testKMP :: Test
+testKMP = testProperty "iteratee/KnuthMorrisPratt" $
+ monadicIO $ forAllM arbitrary prop
+ where
+ prop :: (ByteString, [ByteString]) -> PropertyM IO ()
+ prop (needle', haystack') = do
+ let needle = B16.encode needle'
+ let haystack = Prelude.map B16.encode haystack'
+
+ let lneedle = L.fromChunks [needle]
+ let lhaystack = L.fromChunks haystack
+
+ pre ((not $ S.null needle) &&
+ (not $ L.null lhaystack) &&
+ (not $ S.isInfixOf needle (S.concat haystack)))
+
+ -- put the needle at the beginning, at the end, and somewhere in the
+ -- middle
+
+ lhay <- insertNeedle lneedle lhaystack
+ let stream = L.concat [lneedle, lhay]
+
+ -- there should be exactly three Matches
+ let iter = enumLBS stream $$ joinI (kmpEnumeratee needle $$ consume)
+ outp <- QC.run $ run_ iter
+
+ let nMatches = length $ filter isMatch outp
+
+ when (nMatches /= 3) $ QC.run $ do
+ putStrLn "got wrong number of matches!!"
+ putStrLn "needle:\n"
+ putStrLn $ show lneedle
+ putStrLn "\nhaystack:\n"
+ mapM_ (putStrLn . show) (L.toChunks stream)
+ putStrLn "\noutput stream:"
+ mapM_ (putStrLn . show) outp
+ putStrLn ""
+
+ assert $ nMatches == 3
+
+
+ isMatch (Match _) = True
+ isMatch _ = False
+
+ insertNeedle lneedle lhaystack = do
+ idxL <- pick $ choose (0, lenL-1)
+ idxN <- pick $ choose (0, lenN-1)
+ idxN2 <- pick $ choose (0, lenN-1)
+ let (l1, l2) = L.splitAt (toEnum idxL) lhaystack
+ let (n1, n2) = L.splitAt (toEnum idxN) lneedle
+ let (n3, n4) = L.splitAt (toEnum idxN2) lneedle
+
+ return $ L.concat [ l1, n1, n2, l2, n3, n4 ]
+
+ where
+ lenN = fromEnum $ L.length lneedle
+ lenL = fromEnum $ L.length lhaystack
+
+------------------------------------------------------------------------------
testKillIfTooSlow1 :: Test
testKillIfTooSlow1 = testCase "iteratee/killIfTooSlow1" $ do
let iter = killIfTooSlow (return ()) 1000 4 consume
-----------------------------------------------------------------------
hooks/post-receive
--
snap-core
_______________________________________________
Snap mailing list
[email protected]
http://mailman-mail5.webfaction.com/listinfo/snap