This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "snap-core".

The branch, master has been updated
       via  405d54d744fa6823a5fca6b0acb38aa212ec1901 (commit)
      from  79bdd2b4dd9b9d140b145c3db6f54f9c0c4c8bbe (commit)


Summary of changes:
 src/Snap/Internal/Iteratee/KnuthMorrisPratt.hs |  108 ++++++++++++++----------
 test/snap-core-testsuite.cabal                 |    1 +
 test/suite/Snap/Iteratee/Tests.hs              |   64 ++++++++++++++
 3 files changed, 130 insertions(+), 43 deletions(-)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 405d54d744fa6823a5fca6b0acb38aa212ec1901
Author: Gregory Collins <[email protected]>
Date:   Sat Feb 5 19:44:24 2011 -0500

    Fix broken KMP implementation (oops)

diff --git a/src/Snap/Internal/Iteratee/KnuthMorrisPratt.hs 
b/src/Snap/Internal/Iteratee/KnuthMorrisPratt.hs
index 10e0dc1..1161f12 100644
--- a/src/Snap/Internal/Iteratee/KnuthMorrisPratt.hs
+++ b/src/Snap/Internal/Iteratee/KnuthMorrisPratt.hs
@@ -10,6 +10,7 @@ module Snap.Internal.Iteratee.KnuthMorrisPratt
 import           Control.Monad.Trans
 import qualified Data.ByteString.Char8 as S
 import           Data.ByteString.Char8 (ByteString)
+import           Data.ByteString.Unsafe as S
 import           Data.Enumerator hiding (head)
 import qualified Data.Enumerator.List as EL
 import qualified Data.Vector           as V
@@ -17,14 +18,14 @@ import           Data.Vector           (Vector)
 import qualified Data.Vector.Mutable   as MV
 import           Prelude               hiding (head)
 
-
 ------------------------------------------------------------------------------
 data MatchInfo = Match !ByteString
                | NoMatch !ByteString
   deriving (Show)
 
 ------------------------------------------------------------------------------
-kmpEnumeratee :: (Monad m) =>
+-- FIXME: s/MonadIO/Monad/
+kmpEnumeratee :: (MonadIO m) =>
                  ByteString                         -- ^ needle
               -> Enumeratee ByteString MatchInfo m a
 kmpEnumeratee needle = checkDone (iter "" 0)
@@ -33,7 +34,7 @@ kmpEnumeratee needle = checkDone (iter "" 0)
     table     = buildKmpTable needle
 
     --------------------------------------------------------------------------
-    iter :: (Monad m) =>
+    iter :: (MonadIO m) =>
             ByteString
          -- ^ num bytes left over from previous match
          -> Int  -- ^ needle index
@@ -45,7 +46,7 @@ kmpEnumeratee needle = checkDone (iter "" 0)
                           (processChunk leftOver needleIndex k)
 
     --------------------------------------------------------------------------
-    finish :: (Monad m) =>
+    finish :: (MonadIO m) =>
               ByteString
            -> (Stream MatchInfo -> Iteratee MatchInfo m a)
            -> Iteratee ByteString m (Step MatchInfo m a)
@@ -56,34 +57,36 @@ kmpEnumeratee needle = checkDone (iter "" 0)
             checkDone (\k' -> lift $ runIteratee $ k' EOF) step
 
     --------------------------------------------------------------------------
-    processChunk :: (Monad m) =>
+    processChunk :: (MonadIO m) =>
                     ByteString
                  -> Int
                  -> (Stream MatchInfo -> Iteratee MatchInfo m a)
                  -> ByteString
                  -> Iteratee ByteString m (Step MatchInfo m a)
-    processChunk !leftOver !needleIndex !k !input =
-        go 0 needleIndex
+    processChunk !leftOver !needleIndex !k !input = go 0 needleIndex
+
       where
-        leftOverLen = S.length leftOver
-        ilen = S.length input
+        !inputLen    = S.length input
+        !leftOverLen = S.length leftOver
+        !totalLen    = inputLen + leftOverLen
 
-        ----------------------------------------------------------------------
+        -- m = start of match in leftOver + index
+        -- i = needle index
         go !m !i
-           | (mi >= ilen)                           = finalize m i
-           | (S.index needle i == S.index input ii) =
-               if i == needleLen-1
-                 then yieldMatch m
-                 else go m (i+1)
+           | (m+i >= totalLen) = finalize m i
+           | (S.unsafeIndex needle i == S.unsafeIndex input ii) =
+                 if i == needleLen - 1
+                   then yieldMatch m
+                   else go m (i+1)
            | otherwise = go m' i'
 
           where
+            ii = i + m - leftOverLen
             ti = V.unsafeIndex table i
-            mi = m+i+leftOverLen
-            ii = m+i-leftOverLen
             m' = m + i - ti
             i' = max 0 ti
 
+           
         ----------------------------------------------------------------------
         -- here we've reached the end of the input chunk. A couple of things
         -- we know:
@@ -96,36 +99,55 @@ kmpEnumeratee needle = checkDone (iter "" 0)
         -- * the input from [m..ilen) is a partial match that we need to feed
         --   forward
         finalize m i
-            | m == 0    = iter (S.append leftOver input) i k
-            | otherwise = if (S.null notmatching)
-                            then iter rest i k
-                            else do
-                              step <- lift $ runIteratee $ k chunk
-                              checkDone (iter rest i) step
-          where
-            (nomatch,rest) = S.splitAt m input
-            notmatching    = S.append leftOver nomatch
-            chunk          = Chunks [NoMatch notmatching]
+            | m == 0 = iter (S.append leftOver input) i k
+
+            | m < leftOverLen = do
+                -- here part of the leftover is the no match and we carry the
+                -- rest forward along with the input
+                let (nomatch, restLeftOver) = S.splitAt m leftOver
+                let rest = S.append restLeftOver input
+                let chunk = Chunks [NoMatch nomatch]
+                step <- lift $ runIteratee $ k chunk
+                checkDone (iter rest i) step
+
+            | otherwise = do
+                -- the whole leftOver part was garbage.
+                let m' = m - leftOverLen
+                let (nomatchInput, rest) = S.splitAt m' input
+                let nomatch = S.append leftOver nomatchInput
+                let chunk = Chunks [NoMatch nomatch]
+                step <- lift $ runIteratee $ k chunk
+                checkDone (iter rest i) step
+            
 
         ----------------------------------------------------------------------
         -- we got a match! We need to yield [0..m) to the inner iteratee as a
         -- nomatch, then yield the needle, then go back to processing the rest
         -- of the input from scratch. Same caveats re: m==0 apply here.
-        yieldMatch m
-            | m == 0    = do
-                step <- lift $ runIteratee $ k $ Chunks [Match needle]
-                checkDone (\k' -> processChunk "" 0 k' rest0) step
-            | otherwise = do
-                step <- lift $ runIteratee $ k $ Chunks [NoMatch notmatching]
-                flip checkDone step $ \k' -> do
-                    step' <- lift $ runIteratee $ k' $ Chunks [Match needle]
-                    flip checkDone step' $ \k'' -> processChunk "" 0 k'' rest
+        yieldMatch m 
+            | m == 0 = do
+                 -- we have no garbage and just advance by the size of the 
needle
+                 step <- lift $ runIteratee $ k $ Chunks [Match needle]
+                 -- we also can be sure here that the needle crosses the
+                 -- leftOver..input boundary (otherwise we would have yielded
+                 -- it earlier)
+                 let m' = needleLen - leftOverLen
+                 let rest = S.drop m' input
+                 checkDone (\k' -> processChunk "" 0 k' rest) step
 
-          where
-            nomatch     = S.take m input
-            notmatching = S.append leftOver nomatch
-            rest0       = S.drop (m+needleLen-leftOverLen) input
-            rest        = S.drop (m+needleLen) input
+            | otherwise = do
+                 let (garbage,rest) =
+                         if m < leftOverLen
+                           then let (a,b) = S.splitAt m leftOver
+                                in (a, S.drop needleLen $ S.append b input)
+                           else let m'    = m - leftOverLen
+                                    (a,b) = S.splitAt m' input
+                                in (S.append leftOver a, S.drop needleLen b)
+                 
+                 step <- lift $ runIteratee $ k $ Chunks [NoMatch garbage]
+                 flip checkDone step $ \k' -> do
+                     step' <- lift $ runIteratee $ k' $ Chunks [Match needle]
+                     flip checkDone step' $ \k'' -> processChunk "" 0 k'' rest
 
 
 ------------------------------------------------------------------------------
@@ -144,8 +166,8 @@ buildKmpTable needle = V.create $ do
         if pos >= needleLen
           then return t
           else do
-            let wPos1 = S.index needle (pos-1)
-            let wCnd  = S.index needle cnd
+            let wPos1 = S.unsafeIndex needle (pos-1)
+            let wCnd  = S.unsafeIndex needle cnd
 
             if wPos1 == wCnd
               then do
diff --git a/test/snap-core-testsuite.cabal b/test/snap-core-testsuite.cabal
index 9c96685..79c7528 100644
--- a/test/snap-core-testsuite.cabal
+++ b/test/snap-core-testsuite.cabal
@@ -24,6 +24,7 @@ Executable testsuite
     attoparsec >= 0.8.1 && < 0.9,
     attoparsec-enumerator >= 0.2.0.3,
     base >= 4 && < 5,
+    base16-bytestring == 0.1.*,
     blaze-builder >= 0.2.1.4 && <0.3,
     bytestring,
     bytestring-nums,
diff --git a/test/suite/Snap/Iteratee/Tests.hs 
b/test/suite/Snap/Iteratee/Tests.hs
index e4fca4f..d693bb1 100644
--- a/test/suite/Snap/Iteratee/Tests.hs
+++ b/test/suite/Snap/Iteratee/Tests.hs
@@ -12,6 +12,7 @@ import           Control.Exception hiding (try, assert)
 import           Control.Monad
 import           Control.Monad.Identity
 import           Control.Monad.Trans
+import qualified Data.ByteString.Base16 as B16
 import           Data.ByteString.Char8 (ByteString)
 import qualified Data.ByteString.Char8 as S
 import qualified Data.ByteString.Lazy.Char8 as L
@@ -22,12 +23,14 @@ import           System.Timeout
 import           Test.Framework
 import           Test.Framework.Providers.QuickCheck2
 import           Test.QuickCheck
+import           Test.QuickCheck.Gen
 import qualified Test.QuickCheck.Monadic as QC
 import           Test.QuickCheck.Monadic hiding (run)
 import           Test.Framework.Providers.HUnit
 import qualified Test.HUnit as H
 
 import           Snap.Iteratee
+import           Snap.Internal.Iteratee.KnuthMorrisPratt
 import           Snap.Test.Common ()
 
 import Snap.Internal.Iteratee.Debug
@@ -74,6 +77,7 @@ tests = [ testEnumBS
         , testCountBytes2
         , testKillIfTooSlow1
         , testKillIfTooSlow2
+        , testKMP
         ]
 
 testEnumBS :: Test
@@ -400,6 +404,7 @@ testCountBytes = testProperty "iteratee/countBytes" $
        n = L.length s
 
 
+------------------------------------------------------------------------------
 testCountBytes2 :: Test
 testCountBytes2 = testProperty "iteratee/countBytes2" $
                   monadicIO $ forAllM arbitrary prop
@@ -423,6 +428,65 @@ testCountBytes2 = testProperty "iteratee/countBytes2" $
 
 
 ------------------------------------------------------------------------------
+testKMP :: Test
+testKMP = testProperty "iteratee/KnuthMorrisPratt" $
+          monadicIO $ forAllM arbitrary prop
+  where
+    prop :: (ByteString, [ByteString]) -> PropertyM IO ()
+    prop (needle', haystack') = do
+        let needle = B16.encode needle'
+        let haystack = Prelude.map B16.encode haystack'
+
+        let lneedle = L.fromChunks [needle]
+        let lhaystack = L.fromChunks haystack
+
+        pre ((not $ S.null needle) &&
+             (not $ L.null lhaystack) &&
+             (not $ S.isInfixOf needle (S.concat haystack)))
+
+        -- put the needle at the beginning, at the end, and somewhere in the
+        -- middle
+
+        lhay <- insertNeedle lneedle lhaystack
+        let stream = L.concat [lneedle, lhay]
+
+        -- there should be exactly three Matches
+        let iter = enumLBS stream $$ joinI (kmpEnumeratee needle $$ consume)
+        outp <- QC.run $ run_ iter
+
+        let nMatches = length $ filter isMatch outp
+
+        when (nMatches /= 3) $ QC.run $ do
+            putStrLn "got wrong number of matches!!"
+            putStrLn "needle:\n"
+            putStrLn $ show lneedle
+            putStrLn "\nhaystack:\n"
+            mapM_ (putStrLn . show) (L.toChunks stream)
+            putStrLn "\noutput stream:"
+            mapM_ (putStrLn . show) outp
+            putStrLn ""
+
+        assert $ nMatches == 3
+
+
+    isMatch (Match _) = True
+    isMatch _         = False
+
+    insertNeedle lneedle lhaystack = do
+        idxL  <- pick $ choose (0, lenL-1)
+        idxN  <- pick $ choose (0, lenN-1)
+        idxN2 <- pick $ choose (0, lenN-1)
+        let (l1, l2) = L.splitAt (toEnum idxL) lhaystack
+        let (n1, n2) = L.splitAt (toEnum idxN) lneedle
+        let (n3, n4) = L.splitAt (toEnum idxN2) lneedle
+
+        return $ L.concat [ l1, n1, n2, l2, n3, n4 ]
+
+      where
+        lenN = fromEnum $ L.length lneedle
+        lenL = fromEnum $ L.length lhaystack
+
+------------------------------------------------------------------------------
 testKillIfTooSlow1 :: Test
 testKillIfTooSlow1 = testCase "iteratee/killIfTooSlow1" $ do
     let iter = killIfTooSlow (return ()) 1000 4 consume
-----------------------------------------------------------------------


hooks/post-receive
-- 
snap-core
_______________________________________________
Snap mailing list
[email protected]
http://mailman-mail5.webfaction.com/listinfo/snap

Reply via email to