from xml.dom.ext.reader import Sax2

class DataNode:
    """
    This class parses an XML DOM Element and returns its data as a tree
    of dictionaries (every tag name is a key), lists and Unicode strings.
    Tag attributes are represented as elements of the associated Dictionary,
    where the key for the attribute foo is u'@foo'.
    Data is stored in a DataNode object's data attribute. Depending on the
    structure of the parsed XML file, data may be a dictionary, a list
    or a Unicode string.
    The root tag and its attributes aren't preserved.
    Empty tags (such as <tag/>) aren't supported yet.
    """
    def __init__(self, rootElement):
        self.data = {}
        
        for attribute in rootElement.attributes.values():
            # First, put the attributes in data
            self.data[u'@%s' % attribute.name] = attribute.value
        
        for child in rootElement.childNodes:
            # Traverse the Element's children
            
            if child.hasChildNodes():
                # The child has children on its own: it's a tag that
                # Will be used as a key
                key = child.tagName
                value = DataNode(child).data
                if self.data.has_key(key):
                    if type(self.data[key]) != list:
                        self.data[key] = [self.data[key]]
                    self.data[key].append(value)
                else:
                    self.data[key] = value
            
            elif child.nodeValue.strip():
                # No children on its own: it's a string
                self.data = child.nodeValue.strip()
            else:
                # Blank node: Likely whitespace between tags
                continue


def main():
    import sys
    if len(sys.argv) != 2:
        print "Usage: python %s <fileName>" % __file__
        return
    fileName = sys.argv[-1]
    root = DataNode(Sax2.Reader().fromStream(open(fileName)).documentElement)
    print root.data

if __name__ == '__main__':
    main()