HTML parser...
    
    
    import htmlparser
    
    type
      NodeKind = enum
        textNode, elementNode
      
      Node = object
        kind: NodeKind
        case kind:
          of textNode:
            text: string
          of elementNode:
            tag: string
            attrs: seq[(string, string)]
            children: seq[Node]
    
    proc parseHTML(html: string): Node =
      var root = Node(kind: elementNode, tag: "root", attrs: @[], children: @[])
      var stack = @[root]
      var parser = newHtmlParser(html)
      while parser.nextNode():
        case parser.nodeType():
          of htmlText:
            let text = parser.text()
            if text.stripLen > 0:
              let node = Node(kind: textNode, text: text)
              stack.last.children.add(node)
          of htmlStartTag:
            let tag = parser.tagName().lower()
            var attrs: seq[(string, string)] = @[]
            while parser.nextAttr():
              let name = parser.attrName().lower()
              let value = parser.attrValue()
              attrs.add((name, value))
            let node = Node(kind: elementNode, tag: tag, attrs: attrs, 
children: @[])
            stack.last.children.add(node)
            stack.add(node)
          of htmlEndTag:
            stack.pop()
          else:
            # Ignore other node types
      return root.children[0]
    
    
    Run

Reply via email to