-----BEGIN PGP SIGNED MESSAGE----- Hash: SHA1
Hi everyone, I read the source, made numerous tests, but SGMLParser's keeps returning *tag* data from previous parser instances. I'm totally confused why.. The content data it returns is ok. E.g.:: sp = MyParser() sp.feed('<test><t />Test</test>') print sp.content, sp.markup sp.close() sp = MyParser() sp.feed('<xml>\n</xml>\r\n') print sp.content, sp.markup sp.close() gives:: ('Test', [{'t': ({}, (0, 0))}, {'test': ({}, (0, 4))}]) ('\n\r\n', [{'t': ({}, (0, 0))}, {'test': ({}, (0, 4))}, {'xml': ({}, (0, 1))}]) It keeps the tags from the previous session, while i'm sure the stack etc. should be clean.. Any ideas? regards, Berend - ---- import sgmllib class MyParser(sgmllib.SGMLParser): content = '' markup = [] span_stack = [] def handle_data(self, data): self.content += data def unknown_starttag(self, tag, attr): stack = { tag: ( dict(attr), ( len(self.content), ) ) } self.span_stack.append(stack) def unknown_endtag(self, tag): prev_tag, ( attr, ( offset, ) ) = self.span_stack.pop().items()[0] if tag: # close all tags on stack until it finds a matching end tag # XXX: need to return to LEVEL, not same tag name while tag != prev_tag: span = { prev_tag: ( attr, ( offset, 0 ) ) } self.markup.append( span ) prev_tag, ( attr, ( offset, ) ) = self.span_stack.pop().items()[0] length = len( self.content ) - offset span = { tag: ( attr, ( offset, length ) ) } self.markup.append( span ) def do_unknown_tag(self, tag, attr): assert not tag and not attr, "do_unknown_tag %s, %s" % (tag, attr) def close(self): sgmllib.SGMLParser.close(self) self.content = '' self.markup = [] self.span_stack = [] def parse_data(data): sp = MyParser() sp.feed(data) r = sp.content, sp.markup sp.close() return r print parse_data('<test><t />Test</test>') print parse_data('<xml>\n</xml>\r\n') print parse_data('<sgml><s>Test 3</s></sgml>') - -- web, http://dotmpe.com () ASCII Ribbon email, berend.van.ber...@gmail.com /\ icq, 26727647; irc, berend/mpe at irc.oftc.net -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.6 (GNU/Linux) iD8DBQFJlXxrn70fkTNDJRgRArWwAKCbhe/FwOu3/XtAja7+rbvIv29HEQCgwtf3 k3eiwfD0yw6t+giXJy1nako= =afE6 -----END PGP SIGNATURE----- -- http://mail.python.org/mailman/listinfo/python-list