import htmllib, formatter, urllib class x(htmllib.HTMLParser): inanchor = True # indicates whether we are inside anchor element def dump(self, tag, attrs): #print tag, for a, v in attrs: if a in ['a', 'src', 'href']: print v,
print #def do_img(self, attrs): # self.dump('img', attrs) def start_a(self, attrs): self.dump('a', attrs) self.inanchor = True # yes now we are in anchor element def handle_data(self,data): if self.inanchor: print data # lets us print the anchor element inner data self.inanchor = False # we handled the anchor element data # this is not a nice way, self.inanchor should be set false # when </a> is reached. try in end_a(self) ... #def start_form(self, attrs): # self.dump('form', attrs) y = x(formatter.NullFormatter()) y.feed(urllib.urlopen('http://www.aquabase.org/fish/dump.php3').read()) y.close() -- http://mail.python.org/mailman/listinfo/python-list