This isn't a bug but a suggestion to generalize the
type of Parser in the hugs/ParseLib.hs module.
I've used this set of parser combinators a bit and
have used them happily. However, I could have been
happier:
It seemed that I was very prone to introduce errors
that were difficult to track down and were more often
than not related to a _lexical_ bug rather than a
_parsing_ bug. But, of course, these two phases
are merged in this library. However, this need not
be the case at all; it is trivial to generalize the
type of Parser as follows:
< newtype Parser a = P (String -> [(a,String)])
---
> newtype Parser a b = P ([a] -> [(b,[a])])
(And of course a few dozen other type signatures
need to be changed.)
And then, _if_one_wants_, one can write one's own
lexer. I have found this approach has made my life
easier.
For possible inclusion in the next release or
for those interested, I've attached the generalized
version of this module.
(If one uses this more general version, "char" becomes
a misnomer, maybe "token" would be a better name. Similarly,
"string" would be misnamed.)
- Mark Tullsen
{-----------------------------------------------------------------------------
A LIBRARY OF MONADIC PARSER COMBINATORS
29th July 1996
Revised, October 1996
Revised again, November 1998
Graham Hutton Erik Meijer
University of Nottingham University of Utrecht
This Haskell 98 script defines a library of parser combinators, and is taken
from sections 1-6 of our article "Monadic Parser Combinators". Some changes
to the library have been made in the move from Gofer to Haskell:
* Do notation is used in place of monad comprehension notation;
* The parser datatype is defined using "newtype", to avoid the overhead
of tagging and untagging parsers with the P constructor.
-----------------------------------------------------------------------------}
module ParseLib
(Parser, item, papply, (+++), sat, many, many1, sepby, sepby1, chainl,
chainl1, chainr, chainr1, ops, bracket, char, digit, lower, upper,
letter, alphanum, string, ident, nat, int, spaces, comment, junk,
parse, token, natural, integer, symbol, identifier, module Monad) where
import Char
import Monad
infixr 5 +++
--- The parser monad ---------------------------------------------------------
newtype Parser a b = P ([a] -> [(b,[a])])
instance Functor (Parser c) where
-- fmap :: (a -> b) -> (Parser c a -> Parser c b)
fmap f (P p) = P (\inp -> [(f v, out) | (v,out) <- p inp])
instance Monad (Parser c) where
-- return :: a -> Parser c a
return v = P (\inp -> [(v,inp)])
-- >>= :: Parser c a -> (a -> Parser c b) -> Parser c b
(P p) >>= f = P (\inp -> concat [papply (f v) out | (v,out) <- p inp])
instance MonadPlus (Parser c) where
-- mzero :: Parser c a
mzero = P (\inp -> [])
-- mplus :: Parser c a -> Parser c a -> Parser c a
(P p) `mplus` (P q) = P (\inp -> (p inp ++ q inp))
--- Other primitive parser combinators ---------------------------------------
item :: Parser a a
item = P (\inp -> case inp of
[] -> []
(x:xs) -> [(x,xs)])
force :: Parser a b -> Parser a b
force (P p) = P (\inp -> let x = p inp in
(fst (head x), snd (head x)) : tail x)
first :: Parser a b -> Parser a b
first (P p) = P (\inp -> case p inp of
[] -> []
(x:xs) -> [x])
papply :: Parser a b -> [a] -> [(b,[a])]
papply (P p) inp = p inp
--- Derived combinators ------------------------------------------------------
(+++) :: Parser a b -> Parser a b -> Parser a b
p +++ q = first (p `mplus` q)
sat :: (a -> Bool) -> Parser a a
sat p = do {x <- item; if p x then return x else mzero}
many :: Parser a b -> Parser a [b]
many p = force (many1 p +++ return [])
many1 :: Parser a b -> Parser a [b]
many1 p = do {x <- p; xs <- many p; return (x:xs)}
sepby :: Parser a b -> Parser a c -> Parser a [b]
p `sepby` sep = (p `sepby1` sep) +++ return []
sepby1 :: Parser a b -> Parser a c -> Parser a [b]
p `sepby1` sep = do {x <- p; xs <- many (do {sep; p}); return (x:xs)}
chainl :: Parser a b -> Parser a (b -> b -> b) -> b -> Parser a b
chainl p op v = (p `chainl1` op) +++ return v
chainl1 :: Parser a b -> Parser a (b -> b -> b) -> Parser a b
p `chainl1` op = do {x <- p; rest x}
where
rest x = do {f <- op; y <- p; rest (f x y)}
+++ return x
chainr :: Parser a b -> Parser a (b -> b -> b) -> b -> Parser a b
chainr p op v = (p `chainr1` op) +++ return v
chainr1 :: Parser a b -> Parser a (b -> b -> b) -> Parser a b
p `chainr1` op = do {x <- p; rest x}
where
rest x = do {f <- op; y <- p `chainr1` op; return (f x y)}
+++ return x
ops :: [(Parser a b,c)] -> Parser a c
ops xs = foldr1 (+++) [do {p; return op} | (p,op) <- xs]
bracket :: Parser a b -> Parser a c -> Parser a d -> Parser a c
bracket open p close = do {open; x <- p; close; return x}
--- Useful parsers -----------------------------------------------------------
char :: Eq a => a -> Parser a a
char x = sat (\y -> x == y)
digit :: Parser Char Char
digit = sat isDigit
lower :: Parser Char Char
lower = sat isLower
upper :: Parser Char Char
upper = sat isUpper
letter :: Parser Char Char
letter = sat isAlpha
alphanum :: Parser Char Char
alphanum = sat isAlphaNum
string :: Eq a => [a] -> Parser a [a]
string [] = return []
string (x:xs) = do {char x; string xs; return (x:xs)}
ident :: Parser Char String
ident = do {x <- lower; xs <- many alphanum; return (x:xs)}
nat :: Parser Char Int
nat = do {x <- digit; return (digitToInt x)} `chainl1` return op
where
m `op` n = 10*m + n
int :: Parser Char Int
int = do {char '-'; n <- nat; return (-n)} +++ nat
--- Lexical combinators ------------------------------------------------------
spaces :: Parser Char ()
spaces = do {many1 (sat isSpace); return ()}
comment :: Parser Char ()
comment = do {string "--"; many (sat (\x -> x /= '\n')); return ()}
junk :: Parser Char ()
junk = do {many (spaces +++ comment); return ()}
parse :: Parser Char a -> Parser Char a
parse p = do {junk; p}
token :: Parser Char a -> Parser Char a
token p = do {v <- p; junk; return v}
--- Token parsers ------------------------------------------------------------
natural :: Parser Char Int
natural = token nat
integer :: Parser Char Int
integer = token int
symbol :: String -> Parser Char String
symbol xs = token (string xs)
identifier :: [String] -> Parser Char String
identifier ks = token (do {x <- ident; if not (elem x ks) then return x
else mzero})
------------------------------------------------------------------------------