I parse them to list of elements:
datatype myXml =
Text of string
| Open of string * list { N : string, V : string } * option string
| Close of string
| Other
Text of string
| Open of string * list { N : string, V : string } * option string
| Close of string
| Other
I've attached related modules, but the main problem is that I don't want to write full XML parser (dealing with entities, for example). Current code does not cover all cases (it is dirty and seems to have a silly bugs even with escaping; I have used it for two test files only for work with UI part). Probably, it is better do not invest time to optimization...
12.10.2012, 16:53, "Adam Chlipala" <[email protected]>:
OK, then I'd expect to parse fragments into a simple tree datatype.
Maybe if you point us to an example of your parsing code, I can give some advice on making it faster. (Linear-time/space parsing of strings in pure Ur code should be pretty easy, if you use the right standard library functions.)
(** Parsec. *) (*type parser t = string -> option (t * string)*) con parser t = string -> option (t * string)
val parser_monad = @@mkMonad [fn t::Type => parser t] {
Return = fn [t:::Type] (a:t) => (fn (s:string) => Some (a, s) ):parser t,
Bind = fn [t1:::Type] [t2:::Type] (p:parser t1) (pp:t1->parser t2) =>
(fn s => case p s of
None => None
| Some(a,s') => (pp a) s' ) : parser t2
}
val tail s = strsuffix s 1
val head s = strsub s 0
val empty s = not (strlenGe s 1)
val parse = fn [t:::Type] (p : parser t) (s : string) =>
case p s of Some (x, _) => Some x | None => None
val eof = fn s => if empty s then Some ((), s) else None
val ok = fn s => Some ((), s)
val fail [t:::Type] _ = None : option (t * string)
val pred (pred : char -> bool) : parser string = fn (s : string) => if strlenGe s 1 then case pred (head s) of
True => Some (str1 (head s), tail s)
| False => None
else None
val string (prefix:string) : parser string = fn s =>
if String.isPrefix {Full = s, Prefix = prefix} then Some (prefix, strsuffix s (strlen prefix))
else None
val string_ prefix : parser {} = fn s => case string prefix s of
Some (_, s') => Some ({}, s')
| None => None
(*val string_ s : parser {} = _i <- string s; return ()*)
val any s = if empty s then None else Some (str1 (head s), tail s)
val char s = if empty s then None else (let val h = head s val n = Basis.ord h val t = tail s in
if n < 8*16 (*h < #"\x80"*) then Some( substring s 0 1, strsuffix s 1 ) else
if n >= 12*16 && n < 14*16 (*h >= #"\xC0" && h < #"\xE0"*) then (if strlenGe s 2 then Some (substring s 0 2, strsuffix s 2) else None) else
if n >= 14*16 && n < 15*16 (*h >= #"\xE0" && h < #"\xF0"*) then (if strlenGe s 3 then Some (substring s 0 3, strsuffix s 3) else None) else
if n>=15*16 && n < 15*16+8 (*h >= #"\xF0" && h < #"\xF8"*) then (if strlenGe s 4 then Some (substring s 0 4, strsuffix s 4) else None) else None
end)
val byte s = if empty s then None else Some (head s, tail s)
fun many [t:::Type] (p:parser t) (s:string) : option (list t * string) =
case p s of
Some (x, s') => (case many p s' of
Some (xs, s'') => Some (x::xs, s'')
| None => None )
| None => Some (Nil : list t, s)
val while pred s = let fun skipWhile s1 n1 =
case s1 of "" => n1
| _ => if pred (head s1) then skipWhile (tail s1) (n1+1) else n1
val i = skipWhile s 0
in Some (substring s 0 i, strsuffix s i) end
val while1 pred = fn s => case while pred s of
None => None
| Some (h, t) => if strlenGe h 1 then Some (h, t) else None
val whileChar pred s = let fun skipWhile s1 n1 =
case char s1 of None => n1
| Some (c,s2) => if pred c then skipWhile s2 (n1+strlen c) else n1
val i = skipWhile s 0
in Some(substring s 0 i, strsuffix s i) end
val or [t:::Type] (p1:parser t) (p2:parser t) : parser t = fn s => case p1 s of Some x => Some x | None => p2 s
(* TODO: string return type -- is not well. *)
val before [t:::Type] (p:parser t) = fn (s:string) => case p s : option (t*string) of Some _ => Some ("", s) | None => None
fun fix : t:::Type -> (parser t -> parser t) -> parser t =
fn [t:::Type] (pt : parser t -> parser t) =>
let fun p = fn (s:string) => pt p s : option(t*string) in p end
val seeTail [t:::Type] (p:parser t) (s:string) = p s
(*val sepBy1 [t:::Type] [t1:::Type] (sep : parser t1) (p : parser t) = fix (fn y => l <- p; ls <- or (_l <- sep; y) (return []); return l::ls)
val sepBy sep p = or (sepBy1 sep p) (return [])*)
val try [t:::Type] (p:parser t) = fn(s:string) => case p s of
Some (r, s') => Some (Some r, s')
| None => Some (None, s)
fun skipTo prefix = fn (s:string) => if not (strlenGe s 1) || String.isPrefix { Full = s, Prefix = prefix } then Some ((), s) else skipTo prefix (tail s)
(* TODO:
* utf-8 --- DONE
* more combinators
* Overload strings and lists: class Parseable with fast tail*)
(*con parser t = string -> option (t * string)*)
con parser :: Type -> Type (*= fn t :: Type => string -> option (t * string)*)
(*con parser = [t::Type] => string -> option (t * string)*)
val parser_monad : monad parser
val parse : t:::Type -> parser t -> string -> option t
val eof : parser {}
val ok : parser {}
val fail : t:::Type -> parser t
val pred : (char -> bool) -> parser string
val char : parser string (* take utf-8 codepoint*)
val string : string -> parser string
val string_: string -> parser {}
val any : parser string
val byte : parser char
val or : t:::Type -> parser t -> parser t -> parser t
val before : t:::Type -> parser t -> parser string
val many : t:::Type -> parser t -> parser (list t)
(*val sepBy : t:::Type -> t1:::Type -> parser t1 -> parser t -> parser (list t)
val sepBy1 : t:::Type -> t1:::Type -> parser t1 -> parser t -> parser (list t)*)
val while : (char -> bool) -> parser string
val while1 : (char -> bool) -> parser string
val whileChar : (string -> bool) -> parser string
val fix : t:::Type -> (parser t -> parser t) -> parser t
val seeTail : t:::Type -> parser t -> string -> option (t*string)
val try : t:::Type -> parser t -> parser (option t)
val skipTo : string -> parser {}
xml.ur
Description: Binary data
util.ur
Description: Binary data
_______________________________________________ Ur mailing list [email protected] http://www.impredicative.com/cgi-bin/mailman/listinfo/ur
