HTML parser...
import htmlparser
type
NodeKind = enum
textNode, elementNode
Node = object
kind: NodeKind
case kind:
of textNode:
text: string
of elementNode:
tag: string
attrs: seq[(string, string)]
children: seq[Node]
proc parseHTML(html: string): Node =
var root = Node(kind: elementNode, tag: "root", attrs: @[], children: @[])
var stack = @[root]
var parser = newHtmlParser(html)
while parser.nextNode():
case parser.nodeType():
of htmlText:
let text = parser.text()
if text.stripLen > 0:
let node = Node(kind: textNode, text: text)
stack.last.children.add(node)
of htmlStartTag:
let tag = parser.tagName().lower()
var attrs: seq[(string, string)] = @[]
while parser.nextAttr():
let name = parser.attrName().lower()
let value = parser.attrValue()
attrs.add((name, value))
let node = Node(kind: elementNode, tag: tag, attrs: attrs,
children: @[])
stack.last.children.add(node)
stack.add(node)
of htmlEndTag:
stack.pop()
else:
# Ignore other node types
return root.children[0]
Run