Whoops! Forgot an executable example ;).
Attached, and also available at
http://issola.caltech.edu/~t/transfer/test-enc.py http://issola.caltech.edu/~t/transfer/test-enc.html
Run 'python test-enc.py test-enc.html' and note that htmllib.HTMLParser-based parsers give different output than HTMLParser.HTMLParser-based parsers.
cheers, --titus
#!/usr/bin/env python2.4 import htmllib import HTMLParser import formatter
### a simple mix-in to demonstrate the problem.
class MixinTest:
def start_option(self, attrs):
print '==> OPTION starting', attrs
# Definition of entities -- derived classes may override
entitydefs = \
{'lt': '<', 'gt': '>', 'amp': '&', 'quot': '"', 'apos': '\''}
def handle_entityref(self, name):
print '==> HANDLING ENTITY', name
table = self.entitydefs
if name in table:
self.handle_data(table[name])
else:
self.unknown_entityref(name)
return
####
class htmllib_Parser(MixinTest, htmllib.HTMLParser):
def __init__(self):
htmllib.HTMLParser.__init__(self, formatter.NullFormatter())
class nonhtmllib_Parser(MixinTest, HTMLParser.HTMLParser):
def handle_starttag(self, name, attrs):
"Redirect OPTION tag ==> MixinTest.start_option"
if name == 'option':
self.start_option(attrs)
pass
###
import sys
data = open(sys.argv[1]).read()
print 'PARSING with htmllib.HTMLParser'
htmllib_p = htmllib_Parser()
htmllib_p.feed(data)
print '\nPARSING with HTMLParser.HTMLParser'
nonhtmllib_p = nonhtmllib_Parser()
nonhtmllib_p.feed(data)
Size of pizza (measured in "):
Small (6)
Medium (10)
Large (14)
-- http://mail.python.org/mailman/listinfo/python-list
