Re: [Haskell-cafe] please comment on my parser, can I do this cleaner?

2009-06-09 Thread Thomas Hartman
Thanks. It seems my original parser also works against FOO,BAR,BAZ if
you only modify

atom = string ","
           <|> ( many1 $ noneOf "()<>," ) -- add ,

Indeed, what to call the "thingies" in a parser is a source of some
personal consternation.

What is a token, what is an atom, what is an expr? It all seems to be
somewhat ad hoc.

> 2009/6/9 Daniel Fischer :
>> Am Dienstag 09 Juni 2009 20:29:09 schrieb Thomas Hartman:
>>> All I want to do is split on commas, but not the commas inside () or <>
>>> tags.
>>>
>>> I have been wanting to master parsec for a long time and this simple
>>> exercise looked like a good place to start.
>>>
>>> The code below does the right thing. Am I missing any tricks to make
>>> this simpler/neater?
>>>
>>> Thanks, thomas.
>>>
>>> thart...@ubuntu:~/perlArena>cat splitEm.
>>> splitEm.hs   splitEm.hs~  splitEm.pl   splitEm.pl~
>>> thart...@ubuntu:~/perlArena>cat splitEm.hs
>>> {-# LANGUAGE ScopedTypeVariables #-}
>>> import Text.ParserCombinators.Parsec
>>> import Text.ParserCombinators.Parsec.Char
>>> import Text.PrettyPrint (vcat, render, text)
>>> import Data.List.Split hiding (sepBy, chunk)
>>> import Text.ParserCombinators.Parsec.Token
>>>
>>> import Debug.Trace
>>> import Debug.Trace.Helpers
>>>
>>> -- this works, but is there a neater/cleaner way?
>>> main = ripInputsXs (toEof splitter) "splitter" [ goodS, badS ]
>>>
>>> -- I need a way to split on commas, but not the commas inside '<>' or
>>> '()' characters
>>> goodS = "<*2>FOO<2,1>,<*3>(SigB<8:0:2>,BAR),<*2>Siga<2:0>,Sigb<8,7,6,5,0>"
>>> badS = "<*2)FOO<2,1>,<*3>(SigB<8:0:2>,BAR),<*2>Siga<2:0>,Sigb<8,7,6,5,0>"
>>> -- the first < matches a ), so reject this
>>>
>>>
>>> splitter = do
>>>   chunks :: [String] <- toEof (many chunk)
>>>   let pieces = map concat $ splitOn [","] chunks
>>>   return pieces -- chunks
>>>   where
>>>     atom = string ","
>>>            <|> ( many1 $ noneOf "()<>" )
>>>     chunk = parenExpr <|>  atom
>>
>> I think that does not do what you want.
>>
>> For input "FOO,BAR,BAZ", chunks is ["FOO,BAR,BAZ"], that won't be split; as 
>> far as I can
>> see, it splits only on commas directly following a parenExpr (or at the 
>> beginning of the
>> input or directly following another splitting comma).
>>
>>>     parenExpr :: GenParser Char st [Char]
>>>     parenExpr = let paren p = betweenInc (char '(' ) (char ')' ) p
>>>                                 <|> betweenInc (char '<' ) (char '>' )
>>> p
>>>                 in paren $ option "" $ do ps <- many1 $ parenExpr <|> atom
>>>                                           return . concat $ ps
>>>
>>> betweenInc o' c' p' = do
>>>   o <- o'
>>>   p <- p'
>>>   c <- c'
>>>   return $ [o] ++ p ++ [c]
>>>
>>> toEof p' = do
>>>         r <- p'
>>>         eof
>>>         return r
>>>
>>>
>>>
>>>
>>>
>>>
>>> ripInputs prs prsName xs = mapM_ (putStrLn . show . parse prs prsName ) xs
>>> ripInputsXs prs prsName xs = mapM_ (putStrLn . showXs . parse prs prsName )
>>> xs where showXs v = case v of
>>>           Left e -> show e
>>>           Right xs -> render . vcat . map text $ xs
>>
>> I can offer (sorry for the names, and I don't know if what that does is 
>> really what you
>> want):
>>
>>
>> keepSepBy :: Parser a -> Parser a -> Parser [a]
>> keepSepBy p sep = (do
>>    r <- p
>>    (do s <- sep
>>        xs <- keepSepBy p sep
>>        return (r:s:xs)) <|> return [r])
>>    <|> return []
>>
>> twain :: Parser a -> Parser a -> Parser [a] -> Parser [a]
>> twain open close list = do
>>    o <- open
>>    l <- list
>>    c <- close
>>    return (o:l++[c])
>>
>> comma :: Parser String
>> comma = string ","
>>
>> simpleChar :: Parser Char
>> simpleChar = noneOf "<>(),"
>>
>> suite :: Parser String
>> suite = many1 simpleChar
>>
>> atom :: Parser String
>> atom = fmap concat $ many1 (parenExp <|> suite)
>>
>> parenGroup :: Parser String
>> parenGroup = fmap concat $ keepSepBy atom comma
>>
>> parenExp :: Parser String
>> parenExp = twain (char '<') (char '>') parenGroup
>>            <|> twain (char '(') (char ')') parenGroup
>>
>> chunks :: Parser [String]
>> chunks = sepBy atom comma
>>
>> splitter = do
>>    cs <- chunks
>>    eof
>>    return cs
>>
>> goodS = "<*2>FOO<2,1>,<*3>(SigB<8:0:2>,BAR),<*2>Siga<2:0>,Sigb<8,7,6,5,0>"
>> badS = "<*2)FOO<2,1>,<*3>(SigB<8:0:2>,BAR),<*2>Siga<2:0>,Sigb<8,7,6,5,0>"
>>
>> goodRes = parse splitter "splitter" goodS
>> badRes = parse splitter "splitter" badS
>>
>>
>> ___
>> Haskell-Cafe mailing list
>> Haskell-Cafe@haskell.org
>> http://www.haskell.org/mailman/listinfo/haskell-cafe
>>
>
___
Haskell-Cafe mailing list
Haskell-Cafe@haskell.org
http://www.haskell.org/mailman/listinfo/haskell-cafe


Re: [Haskell-cafe] please comment on my parser, can I do this cleaner?

2009-06-09 Thread Daniel Fischer
Am Dienstag 09 Juni 2009 20:29:09 schrieb Thomas Hartman:
> All I want to do is split on commas, but not the commas inside () or <>
> tags.
>
> I have been wanting to master parsec for a long time and this simple
> exercise looked like a good place to start.
>
> The code below does the right thing. Am I missing any tricks to make
> this simpler/neater?
>
> Thanks, thomas.
>
> thart...@ubuntu:~/perlArena>cat splitEm.
> splitEm.hs   splitEm.hs~  splitEm.pl   splitEm.pl~
> thart...@ubuntu:~/perlArena>cat splitEm.hs
> {-# LANGUAGE ScopedTypeVariables #-}
> import Text.ParserCombinators.Parsec
> import Text.ParserCombinators.Parsec.Char
> import Text.PrettyPrint (vcat, render, text)
> import Data.List.Split hiding (sepBy, chunk)
> import Text.ParserCombinators.Parsec.Token
>
> import Debug.Trace
> import Debug.Trace.Helpers
>
> -- this works, but is there a neater/cleaner way?
> main = ripInputsXs (toEof splitter) "splitter" [ goodS, badS ]
>
> -- I need a way to split on commas, but not the commas inside '<>' or
> '()' characters
> goodS = "<*2>FOO<2,1>,<*3>(SigB<8:0:2>,BAR),<*2>Siga<2:0>,Sigb<8,7,6,5,0>"
> badS = "<*2)FOO<2,1>,<*3>(SigB<8:0:2>,BAR),<*2>Siga<2:0>,Sigb<8,7,6,5,0>"
> -- the first < matches a ), so reject this
>
>
> splitter = do
>   chunks :: [String] <- toEof (many chunk)
>   let pieces = map concat $ splitOn [","] chunks
>   return pieces -- chunks
>   where
> atom = string ","
><|> ( many1 $ noneOf "()<>" )
> chunk = parenExpr <|>  atom

I think that does not do what you want.

For input "FOO,BAR,BAZ", chunks is ["FOO,BAR,BAZ"], that won't be split; as far 
as I can 
see, it splits only on commas directly following a parenExpr (or at the 
beginning of the 
input or directly following another splitting comma).

> parenExpr :: GenParser Char st [Char]
> parenExpr = let paren p = betweenInc (char '(' ) (char ')' ) p
> <|> betweenInc (char '<' ) (char '>' )
> p
> in paren $ option "" $ do ps <- many1 $ parenExpr <|> atom
>   return . concat $ ps
>
> betweenInc o' c' p' = do
>   o <- o'
>   p <- p'
>   c <- c'
>   return $ [o] ++ p ++ [c]
>
> toEof p' = do
> r <- p'
> eof
> return r
>
>
>
>
>
>
> ripInputs prs prsName xs = mapM_ (putStrLn . show . parse prs prsName ) xs
> ripInputsXs prs prsName xs = mapM_ (putStrLn . showXs . parse prs prsName )
> xs where showXs v = case v of
>   Left e -> show e
>   Right xs -> render . vcat . map text $ xs

I can offer (sorry for the names, and I don't know if what that does is really 
what you 
want):


keepSepBy :: Parser a -> Parser a -> Parser [a]
keepSepBy p sep = (do
r <- p
(do s <- sep
xs <- keepSepBy p sep
return (r:s:xs)) <|> return [r])
<|> return []

twain :: Parser a -> Parser a -> Parser [a] -> Parser [a]
twain open close list = do
o <- open
l <- list
c <- close
return (o:l++[c])

comma :: Parser String
comma = string ","

simpleChar :: Parser Char
simpleChar = noneOf "<>(),"

suite :: Parser String
suite = many1 simpleChar

atom :: Parser String
atom = fmap concat $ many1 (parenExp <|> suite)

parenGroup :: Parser String
parenGroup = fmap concat $ keepSepBy atom comma

parenExp :: Parser String
parenExp = twain (char '<') (char '>') parenGroup
<|> twain (char '(') (char ')') parenGroup

chunks :: Parser [String]
chunks = sepBy atom comma

splitter = do
cs <- chunks
eof
return cs

goodS = "<*2>FOO<2,1>,<*3>(SigB<8:0:2>,BAR),<*2>Siga<2:0>,Sigb<8,7,6,5,0>"
badS = "<*2)FOO<2,1>,<*3>(SigB<8:0:2>,BAR),<*2>Siga<2:0>,Sigb<8,7,6,5,0>"

goodRes = parse splitter "splitter" goodS
badRes = parse splitter "splitter" badS


___
Haskell-Cafe mailing list
Haskell-Cafe@haskell.org
http://www.haskell.org/mailman/listinfo/haskell-cafe


[Haskell-cafe] please comment on my parser, can I do this cleaner?

2009-06-09 Thread Thomas Hartman
All I want to do is split on commas, but not the commas inside () or <> tags.

I have been wanting to master parsec for a long time and this simple
exercise looked like a good place to start.

The code below does the right thing. Am I missing any tricks to make
this simpler/neater?

Thanks, thomas.

thart...@ubuntu:~/perlArena>cat splitEm.
splitEm.hs   splitEm.hs~  splitEm.pl   splitEm.pl~
thart...@ubuntu:~/perlArena>cat splitEm.hs
{-# LANGUAGE ScopedTypeVariables #-}
import Text.ParserCombinators.Parsec
import Text.ParserCombinators.Parsec.Char
import Text.PrettyPrint (vcat, render, text)
import Data.List.Split hiding (sepBy, chunk)
import Text.ParserCombinators.Parsec.Token

import Debug.Trace
import Debug.Trace.Helpers

-- this works, but is there a neater/cleaner way?
main = ripInputsXs (toEof splitter) "splitter" [ goodS, badS ]

-- I need a way to split on commas, but not the commas inside '<>' or
'()' characters
goodS = "<*2>FOO<2,1>,<*3>(SigB<8:0:2>,BAR),<*2>Siga<2:0>,Sigb<8,7,6,5,0>"
badS = "<*2)FOO<2,1>,<*3>(SigB<8:0:2>,BAR),<*2>Siga<2:0>,Sigb<8,7,6,5,0>"
-- the first < matches a ), so reject this


splitter = do
  chunks :: [String] <- toEof (many chunk)
  let pieces = map concat $ splitOn [","] chunks
  return pieces -- chunks
  where
atom = string ","
   <|> ( many1 $ noneOf "()<>" )
chunk = parenExpr <|>  atom
parenExpr :: GenParser Char st [Char]
parenExpr = let paren p = betweenInc (char '(' ) (char ')' ) p
<|> betweenInc (char '<' ) (char '>' )
p
in paren $ option "" $ do ps <- many1 $ parenExpr <|> atom
  return . concat $ ps

betweenInc o' c' p' = do
  o <- o'
  p <- p'
  c <- c'
  return $ [o] ++ p ++ [c]

toEof p' = do
r <- p'
eof
return r






ripInputs prs prsName xs = mapM_ (putStrLn . show . parse prs prsName ) xs
ripInputsXs prs prsName xs = mapM_ (putStrLn . showXs . parse prs prsName ) xs
  where showXs v = case v of
  Left e -> show e
  Right xs -> render . vcat . map text $ xs
___
Haskell-Cafe mailing list
Haskell-Cafe@haskell.org
http://www.haskell.org/mailman/listinfo/haskell-cafe