Hi,

Somebody told me about Parsec 3, which uses a Stream type class so it can parse any data type. This sounded like the right way to do encoding independent parsing, so I decided to see how it would work to parse UTF8 JSON.

Sadly I could not use Text.JSON.Parsec directly, because it uses the old Parsec CharParser type. So I copied to code, and also replaced p_number with the "floating" parser from Text.Parsec.Token, because Text.JSON.Parsec uses readFloat (a dirty hack imho) which works only on String.

If Text.JSON.Parsec was written for Parsec 3, the only thing to write to get UTF8 JSON parsing would be:

instance (Monad m, U.UTF8Bytes string index) => Stream (U.UTF8 string) m Char where
    uncons = return . U.uncons

I did not do any performance measuring yet, I was glad I got it working. Any comments on the code is appreciated!

greetings,
Sjoerd Visscher

{-# LANGUAGE FlexibleInstances, MultiParamTypeClasses, UndecidableInstances #-}
import qualified Data.String.UTF8 as U
import qualified Data.ByteString as B

import Text.Parsec hiding (many, optional, (<|>))
import Control.Applicative

import Text.JSON.Types
import Control.Monad
import Data.Char
import Numeric

instance (Monad m, U.UTF8Bytes string index) => Stream (U.UTF8 string) m Char where
    uncons = return . U.uncons

type CharParser st = Parsec (U.UTF8 B.ByteString) st

parseFile :: FilePath -> IO (Either ParseError JSValue)
parseFile fileName = do
  bs <- B.readFile fileName
  return $ runParser json () fileName (U.fromRep bs)

parseString      :: String -> Either ParseError JSValue
parseString s     = runParser json () "(unknown)" (U.fromString s)

json             :: CharParser () JSValue
json              = spaces *> p_value

tok              :: CharParser () a -> CharParser () a
tok p             = p <* spaces

p_value          :: CharParser () JSValue
p_value           =  (JSNull      <$  p_null)
                 <|> (JSBool      <$> p_boolean)
                 <|> (JSArray     <$> p_array)
                 <|> (JSString    <$> p_js_string)
                 <|> (JSObject    <$> p_js_object)
                 <|> (JSRational False <$> p_number)
                 <?> "JSON value"

p_null           :: CharParser () ()
p_null            = tok (string "null") >> return ()

p_boolean        :: CharParser () Bool
p_boolean         = tok
                      (  (True  <$ string "true")
                     <|> (False <$ string "false")
                      )

p_array          :: CharParser () [JSValue]
p_array           = between (tok (char '[')) (tok (char ']'))
                  $ p_value `sepBy` tok (char ',')

p_string         :: CharParser () String
p_string          = between (tok (char '"')) (char '"') (many p_char)
  where p_char    =  (char '\\' >> p_esc)
                 <|> (satisfy (\x -> x /= '"' && x /= '\\'))

        p_esc     =  ('"'   <$ char '"')
                 <|> ('\\'  <$ char '\\')
                 <|> ('/'   <$ char '/')
                 <|> ('\b'  <$ char 'b')
                 <|> ('\f'  <$ char 'f')
                 <|> ('\n'  <$ char 'n')
                 <|> ('\r'  <$ char 'r')
                 <|> ('\t'  <$ char 't')
                 <|> (char 'u' *> p_uni)
                 <?> "escape character"

        p_uni     = check =<< count 4 (satisfy isHexDigit)
          where check x | code <= max_char  = pure (toEnum code)
                        | otherwise         = empty
                  where code      = fst $ head $ readHex x
                        max_char  = fromEnum (maxBound :: Char)

p_object         :: CharParser () [(String,JSValue)]
p_object          = between (tok (char '{')) (tok (char '}'))
                  $ p_field `sepBy` tok (char ',')
  where p_field   = (,) <$> (p_string <* tok (char ':')) <*> p_value

p_number         :: CharParser () Rational
p_number          = tok floating where

    floating       :: CharParser () Rational
    floating        = do{ n <- decimal
                        ; fract <- option 0 fraction
                        ; expo  <- option 1 exponent'
                        ; return ((fromInteger n + fract)*expo)
                        }

    fraction        = do{ char '.'
                        ; digits <- many1 digit <?> "fraction"
                        ; return (foldr op 0 digits)
                        }
                      <?> "fraction"
                    where
                      op d f    = (f + fromIntegral (digitToInt d))/10

    exponent'       = do{ oneOf "eE"
                        ; f <- sign
                        ; e <- decimal <?> "exponent"
                        ; return (power (f e))
                        }
                      <?> "exponent"
                    where
                       power e  | e < 0      = 1/power(-e)
                                | otherwise  = fromInteger (10^e)

    sign            =   (char '-' >> return negate)
                    <|> (char '+' >> return id)
                    <|> return id

    decimal         = number 10 digit

    number base baseDigit
        = do{ digits <- many1 baseDigit
; let n = foldl (\x d -> base*x + toInteger (digitToInt d)) 0 digits
            ; seq n (return n)
            }


p_js_string      :: CharParser () JSString
p_js_string       = toJSString <$> p_string

p_js_object      :: CharParser () (JSObject JSValue)
p_js_object       = toJSObject <$> p_object


_______________________________________________
Haskell-Cafe mailing list
Haskell-Cafe@haskell.org
http://www.haskell.org/mailman/listinfo/haskell-cafe

Reply via email to