Here's a (surprise!) pyparsing solution. -- Paul (Get pyparsing at http://pyparsing.sourceforge.net.)
data = [ """<td>04/01/2006</td><td>Wednesday</td><td> </td><td>09:14</td><td>12:44</td><td>12:50</td><td>17:58</td><td> </td><td> </td><td> </td><td> </td><td>08:14</td>""", """<td>03/01/2006</td><td>Tuesday</td><td>Annual_Holiday</td><td> </td><td> </td><td> </td><td> </td><td> </td><td> </td><td> </td><td> </td><td>08:00</td>""" ] from pyparsing import * startTD,endTD = makeHTMLTags("TD") startTD = startTD.suppress() endTD = endTD.suppress() dayOfWeek = oneOf("Sunday Monday Tuesday Wednesday Thursday Friday Saturday") nbsp = Literal(" ") time = Combine(Word(nums,exact=2) + ":" + Word(nums,exact=2)) date = Combine(Word(nums,exact=2) + "/" + Word(nums,exact=2) + "/" + Word(nums,exact=4)) entry = ( startTD + date.setResultsName("date") + endTD + startTD + dayOfWeek.setResultsName("dayOfWeek") + endTD + startTD + ( Suppress(nbsp) | Word(alphanums+"_").setResultsName("name") ) + endTD + OneOrMore(startTD + (Suppress(nbsp) | time) + endTD ).setResultsName("dates") ) for d in data: res = entry.parseString(d) print res.date print res.dayOfWeek print res.name print res.dates print Returns: 04/01/2006 Wednesday ['09:14', '12:44', '12:50', '17:58', '08:14'] 03/01/2006 Tuesday Annual_Holiday ['08:00'] -- http://mail.python.org/mailman/listinfo/python-list