On 2/5/07, Kent Johnson <[EMAIL PROTECTED]> wrote: > You can also do this operation easily with dicts (not tested!):
Thank you - code now complete and tests passing. Would appreciate comments / criticisms. I did wonder if I should create a UrlAnalyser Class rather than have hanging methods: #!/usr/bin/python import unittest def myMap(data, search): """Take list of tuples of record number and url accessed and return list of tuples keyed by url with record number as value if search is in the url""" return [(value, key) for key, value in data if search in value] def myGroup(data): """Take list of tuples keyed by url with record number as value, and group together same urls with list of record numbers as value.""" groups = {} for value, index in data: groups.setdefault(value, []).append(index) return sorted(groups.items()) def myReduce(data): """Process list of tuples of url and record number list and return list of url and frequency of occurence.""" return [(value, len(occurences)) for value, occurences in data] class UnitTests(unittest.TestCase): """Do not taunt unit tests.""" def setUp(self): pass def tearDown(self): pass def testMapper(self): """Produce set of intermediate key value pairs, with record content as key and record number as value, if a condition is met.""" test_pairs = [(1, 'a'), (2, 'b'), (3, 'c'), (4, 'a'), (5, 'd')] intermediate_list = [('a', 1), ('a', 4)] self.assertEqual(myMap(test_pairs, "a"), intermediate_list) def testGrouper(self): """Group occurences of a record together: [('fred', 1), ('jim', 2), ('bill', 3), ('jim', 4)] -> [(fred, 1), ('jim', [2, 4]), ('bill' ,3)]""" test_list = [('fred', 1), ('jim', 2), ('bill', 3), ('jim', 4)] grouped_list = [('bill', [3]), ('fred', [1]), ('jim', [2, 4])] self.assertEqual(myGroup(test_list), grouped_list) def testReduce(self): """Aggregate results of map and group functions to produce value and frequency.""" test_intermediate = [('bill', [3]), ('fred', [1]), ('jim', [2, 4])] test_summary = [('bill', 1), ('fred', 1), ('jim', 2)] self.assertEqual(myReduce(test_intermediate), test_summary) def doTests(): """Run our test suite""" suite = unittest.makeSuite(UnitTests,'test') runner = unittest.TextTestRunner() result = runner.run(suite) return result def main(): """Main program here""" print "Analysing URL data:\n" url_data = [(1, 'http://www.beer.com'), (2, 'http://www.ban-beer.com'), (3, 'http://www.bbc.co.uk'), (4, 'http://www.beer.com'), (5, 'http://wwww.kernel.org')] print myReduce(myGroup(myMap(url_data, "beer"))) if __name__ == "__main__": result = doTests() if result.wasSuccessful(): main() else: print "Error - check test output." _______________________________________________ Tutor maillist - Tutor@python.org http://mail.python.org/mailman/listinfo/tutor