module ParseXML where

import Maybe
import Char 
import ClassSymbol
import ClassParsing
import LL1Parsing
import DataXML


{-
 - This module provides a module that parses XML,
 - from a string into a XML object.
 -}

--parseXML :: String -> Either String XMLDoc
parseXML input = 
  case parse3XML stream [("",[],([],[]))] of
    [("",_,(tags,[]))]  -> 
	case [ tag | XMLElement (tag@(XMLTag name _ _)) <- tags,
		name /= "XML" ] of
	   [x] -> Right (XMLDoc [] x)
	   _ -> error "Ugg"
   where
	tokens = tokenizeXML input
	stream = fst (parse parseElements tokens)
	
-- 

--Left "no Parser"

-- ==========================================================================--
-- The tokenizer							    --
-- ==========================================================================--
data XMLToken
	= XMLSpecialChar Int Char	-- special chars, like ()[], 
					-- with their line numbers.
	| XMLChars       Int [Char]		-- a sequence of chars
	| XMLName        Int [Char]		-- a name
	| XMLString	 Int String		-- "string"

instance Eq XMLToken where
	x == y = fromEnum x == fromEnum y

instance Ord XMLToken where
	compare x y = compare (fromEnum x) (fromEnum y)

charTokenSpecials = "<>/?=\" "

{-
 -  States: N S T
 -  N -> T	if <
 -  T -> S	if "
 -  S -> T	if "
 -  T -> N	if >  
 -
 - There are not nested tags or nested strings!
 -}

tokenizeXML :: String -> [XMLToken]
tokenizeXML str = tokXML str LexN [] 1

data LexState = LexN | LexS | LexT | LexC deriving Show

mkXML LexN []    more n = more
mkXML LexN chrs  more n = XMLChars  n (reverse chrs) : more
mkXML LexT []    more n = more
mkXML LexT chrs  more n = XMLName   n (reverse chrs) : more
-- For strings, we allow empty strings!
mkXML LexS chrs  more n = XMLString n (reverse chrs) : more
mkXML LexC chrs  more n = more

mkSpec c n = XMLSpecialChar n c
------------------------------------------------------------------------------
tokXML :: String -> LexState -> String -> Int -> [XMLToken]
{-
tokXML str st rest n 
	| trace message False = error ""
   where
	message = "\n{tokXML " ++ str' ++ " " ++ show st ++ " " ++ rest' ++ " " ++
		show n ++ " = ...}\n"
	str' = concat (map show (take 3 str))
	rest' = concat (map show (take 3 rest))
-}
------------------------------------------------------------------------------
-- First case, the Normal state.
tokXML ('<' :'!':'-':'-':r)
		st@LexN rest n = mkXML st rest (tokXML r LexC [] n) n
tokXML ('<' :r) st@LexN rest n = mkXML st rest (mkSpec '<' n : tokXML r LexT [] n) n
------------------------------------------------------------------------------
-- Second case, the inside Tag state.
tokXML ('>' :r) st@LexT rest n = mkXML st rest (mkSpec '>' n : tokXML r LexN [] n) n
tokXML ('"' :r) st@LexT rest n = mkXML st rest (             tokXML r LexS [] n) n
tokXML (ch :r)  st@LexT rest n | isSpecial
			     = mkXML st rest (mkSpec ch n : tokXML r st [] n) n
    where isSpecial = case ch of
			'=' -> True
			'/' -> True
			'?' -> True
		  	_   -> False
tokXML (ch  :r) st@LexT rest n | isSpace ch
			     = mkXML st rest (tokXML r st [] n') n'
    where
	n' = if ch == '\n' then n+1 else n

------------------------------------------------------------------------------
-- Third case, the inside String state.
tokXML ('"' :r) st@LexS rest n = mkXML st rest (tokXML r LexT [] n) n
------------------------------------------------------------------------------
-- Forth case, the comments
tokXML ('-':'-':'>':r) 
	   	st@LexC rest n = tokXML r LexN [] n
------------------------------------------------------------------------------
-- Default case 1, just carry the char onwards inside the state...
tokXML (ch  :r) st      rest n = tokXML r st (ch:rest) n'
    where
	n' = if ch == '\n' then n+1 else n
-- Default case 2, EOS
tokXML []       st      rest n = mkXML st rest [] n
------------------------------------------------------------------------------

instance Enum XMLToken where
    fromEnum   (XMLSpecialChar _ c) = ord c
    fromEnum   (XMLChars _ _)       = -1
    fromEnum   (XMLName _ _)        = -2
    fromEnum   (XMLString _ _)      = -3 
    toEnum (-1) = XMLString (-1) "string"
    toEnum (-2) = XMLName (-1) "id"
    toEnum (-3) = XMLChars (-1) "...text..." 
    toEnum   c  = XMLSpecialChar (-1) (chr c)

instance Symbol XMLToken where
     char_ c   = XMLSpecialChar (-1) c

instance Show XMLToken where
    showsPrec p   (XMLSpecialChar _ c) = showChar c
    showsPrec p   (XMLChars _ s)       = showString s
    showsPrec p   (XMLName _ s)        = showString s
    showsPrec p   (XMLString _ s)      = shows s

getString (XMLSpecialChar _ c) = [c]
getString (XMLChars _ s)       = s
getString (XMLName _ s)        = s
getString (XMLString _ s)      = s

getLineNumber (XMLSpecialChar ln _) = ln
getLineNumber (XMLChars ln _)       = ln
getLineNumber (XMLName ln _)        = ln
getLineNumber (XMLString ln _)      = ln

aChars  = XMLChars (-1) "text"
aName   = XMLName (-1) "id"
aString = XMLString (-1) "string"

-- ==========================================================================--
-- The first parser							    --
-- ==========================================================================--

data XMLTok2 = XMLTok2StartTag Int String [(String,String)]
	 | XMLTok2EndTag   Int String
	 | XMLTok2Tag      Int String [(String,String)]
	 | XMLTok2Str String
	deriving Show

{-
instance Show XMLTok2 where
    showsPrec p   xml  = showString (fooBar 0 xml)
    showList      xmls = showString (fooBars 0 xmls)

fooBars n xs = concat (map (fooBar n) xs)

fooBar n (XMLStartTag name attr) = ("<" ++ name ++ show attr ++ ">\n" )
fooBar n (XMLEndTag name)        = ("</" ++ name ++ ">\n" )
fooBar n (XMLTag name attr)      = ("<" ++ name ++ show attr ++ "/>\n" )
fooBar n (XMLStr str)            = ("   " ++ [ s | s <- str, s /= '\n'] ++ "\n")
-}

parseElements :: Parsing p => p XMLToken [XMLTok2]
parseElements =
 "elements"	<=> id
		<$> pList parseElement

parseLessThan :: Parsing p => p XMLToken Int
parseLessThan = 
 "element"	<=> getLineNumber
		<$> symbol (char_ '<')

parseElement :: Parsing p => p XMLToken XMLTok2
parseElement = 
	 "element"	<=> XMLTok2StartTag
		<$> parseLessThan  <*> parseName <*> parseAttributes <*-> char '>'

 		<|> XMLTok2EndTag
		<$> parseLessThan  <*-> char '/' <*> parseName <*-> char '>'

		<|> XMLTok2Tag
		<$> parseLessThan <*> parseName <*> parseAttributes <*-> char '/' <*-> char '>' 

		<|> XMLTok2Str
		<$> parseChars
		
parseAttributes :: Parsing p => p XMLToken [(String,String)]
parseAttributes =
 "attributes"	<=> id
		<$> pList parseAttribute

parseAttribute :: Parsing p => p XMLToken (String,String)
parseAttribute =
 "attribute"	<=> (,) 
		<$> parseName <*-> char '=' <*> parseString

		<|> (\ x -> (x,""))
		<$> parseName 



parseChars :: Parsing p => p XMLToken String
parseChars =
 "parse-str"	<=> getString 
		<$> symbol aChars

parseName :: Parsing p => p XMLToken String
parseName =
 "parseName"	<=> getString 
		<$> symbol aName

parseString :: Parsing p => p XMLToken String
parseString = 
 "parseString"	<=> getString 
		<$> symbol aString


-- ==========================================================================--
-- The second parser							    --
-- ==========================================================================--

addIn ::ParseState -> XMLElement -> ParseState
addIn (a,b) obj = (a ++ [obj],b)

addIns ::ParseState -> [XMLElement] -> ParseState
addIns (a,b) objs = (a ++ objs,b)

startAft ::ParseState -> ParseState
startAft (a,b) = ([],a:b)

type Attr = [(String,String)]

type ParseState = ([XMLElement],[[XMLElement]])

mkXMLTag n a r = XMLElement (XMLTag n a r)

type Parse3XML = [(String,[Attr],ParseState)]
parse3XML :: [XMLTok2] -> Parse3XML -> Parse3XML
parse3XML (XMLTok2Str str:xml1) ((name,attr,sofar):rest)
	= parse3XML xml1 ((name,attr,addIn sofar (XMLStr str)) : rest)
parse3XML (XMLTok2StartTag ln name attr:xml1) rest@((context,attrs,sofar):rest')
	= if name == context
	  then -- hey, we've got more of the same.
	       -- rather than plan down 
	       parse3XML xml1 ((name,attr:attrs,startAft sofar):rest')
	  else -- Well, it looks like a *new* tag. Great!
	       parse3XML xml1 ((name,[attr],([],[])):rest)

parse3XML (XMLTok2EndTag ln tagName:xml1) ((name,attrs,(hd,tl)):(name',attrs',sofar'):rest)
	| name == tagName  || tagName == "*"
	= case tl of
	   [] -> -- We have just *one* lot of this specific set,
	         -- so we close it off, and continue.
	         parse3XML xml1 ((name',attrs',
				addIn sofar' (mkXMLTag name (head attrs) hd)):rest)
	   (hdtl:tltl) -> -- We've got several, so we close off just one
		 parse3XML xml1 ((name,tail attrs,addIn (hdtl,tltl) 
						(mkXMLTag name (head attrs) hd))
				:(name',attrs',sofar') : rest)


parse3XML (XMLTok2Tag ln tagName attr:xml1) rest
	= parse3XML (XMLTok2StartTag ln tagName attr
		    :XMLTok2EndTag ln tagName
		    :xml1) rest


parse3XML (XMLTok2EndTag ln tagName:xml1) ((name,attrs,(hd,tl)):(name',attrs',sofar'):rest)
	-- ASSERT: name /= tagName
	-- Now what we do is turn the opening tags into
	-- open/close tags at their respectic locations,
	-- before carrying on.
	= parse3XML (XMLTok2EndTag ln tagName:xml1) ((name',attrs',sofar2):rest)
   where
	sofar2 = foldr addEntry sofar' (zip (hd:tl) attrs)
	addEntry :: ([XMLElement],Attr) -> ParseState -> ParseState
	addEntry (lst,attr) rest = addIns rest ((mkXMLTag name attr []):lst)



parse3XML [] ((name,attrs,(hd,tl)):(name',attrs',sofar'):rest)
	-- ASSERT: name /= tagName
	-- Now what we do is turn the opening tags into
	-- open/close tags at their respectic locations,
	-- before carrying on.
	= parse3XML [] ((name',attrs',sofar2):rest)
   where
	sofar2 = foldr addEntry sofar' (zip (hd:tl) attrs)
	addEntry :: ([XMLElement],Attr) -> ParseState -> ParseState
	addEntry (lst,attr) rest = addIns rest ((mkXMLTag name attr []):lst)


parse3XML []  		                    rest = rest


parse3XML (XMLTok2EndTag ln tagName:xd_) _ =
	error ("Extra endtag: </" ++ tagName ++ "> at line " ++ show ln)

-- error "" -- lst : rest

-- parse3XML (XMLEndTag tagName:xml1) ((name,(hd,tl)):(name',sofar'):rest)

-- addIn sofar' (mkXMLTag name [] hd)):rest)


parse3XML x y = error (show x ++ ":" ++ show y)

{-

------------------------------------------------------------------------------
-- Examples


{-
--sample = "<ul><li> Hello <li> World </li></ul>"
sample = "<ul\n><p></p>\n</ul>"
--sample = "Hello World"

sample2 = tokenizeXML sample 


--sample3 = fst (parse parseElements sample2) 

--sample4 = parse3XML sample3 [("",([],[]))]

sample5 = test (parse parseElements . tokenizeXML)

sample6 = test (reverse . (\ x -> flip parse3XML [("",([],[]))] x) . fst . parse parseElements . tokenizeXML)
-}

------------------------------------------------------------------------------
-}
