George Sakkis wrote: > Michael Spencer wrote: > >> Here's a small update to the generator that allows optional handling of the >> head >> and the tail: >> >> def chunker(s, chunk_size=3, sentry=".", keep_first = False, keep_last = >> False): >> buffer=[] ... > > And here's a (probably) more efficient version, using a deque as a > buffer: >
Perhaps the deque-based solution is more efficient under some conditions, but it's significantly slower for all the cases I tested: Here are some typical results: Using George's deque buffer: >>> time_chunkers(chunkerGS, groups=1000, words_per_group=1000, chunk_size=300) 'get_chunks(...) 30 iterations, 16.70msec per call' >>> time_chunkers(chunkerGS, groups=1000, words_per_group=1000, chunk_size=30) 'get_chunks(...) 35 iterations, 14.56msec per call' >>> time_chunkers(chunkerGS, groups=1000, words_per_group=1000, chunk_size=3) 'get_chunks(...) 35 iterations, 14.41msec per call' Using the list buffer >>> time_chunkers(chunker, groups=1000, words_per_group=1000, chunk_size=300) 'get_chunks(...) 85 iterations, 5.88msec per call' >>> time_chunkers(chunker, groups=1000, words_per_group=1000, chunk_size=30) 'get_chunks(...) 85 iterations, 5.89msec per call' >>> time_chunkers(chunker, groups=1000, words_per_group=1000, chunk_size=3) 'get_chunks(...) 83 iterations, 6.03msec per call' >>> Test functions follow: def make_seq(groups = 1000, words_per_group = 3, word_length = 76, sentry = "."): """Make a sequence of test input for chunker >>> make_seq(groups = 5, words_per_group=5, word_length = 2, sentry="%") ['WW', 'WW', 'WW', 'WW', 'WW', '%', 'WW', 'WW', 'WW', 'WW', 'WW', '%', 'WW', 'WW', 'WW', 'WW', 'WW', '%', 'WW', 'WW', 'WW', 'WW', 'WW', '%', 'WW', 'WW', 'WW', 'WW', 'WW', '%'] """ word = "W"*word_length group = [word]*words_per_group+[sentry] return group*groups def time_chunkers(chunk_func, groups = 1000, words_per_group=10, chunk_size=3): """Test harness for chunker functions""" seq = make_seq(groups) def get_chunks(chunk_func, seq): return list(chunk_func(seq)) return timefunc(get_chunks, chunk_func, seq) def _get_timer(): import sys import time if sys.platform == "win32": return time.clock else: return time.time return def timefunc(func, *args, **kwds): timer = _get_timer() count, totaltime = 0, 0 while totaltime < 0.5: t1 = timer() res = func(*args, **kwds) t2 = timer() totaltime += (t2-t1) count += 1 if count > 1000: unit = "usec" timeper = totaltime * 1000000 / count else: unit = "msec" timeper = totaltime * 1000 / count return "%s(...) %s iterations, %.2f%s per call" % \ (func.__name__, count, timeper, unit) -- http://mail.python.org/mailman/listinfo/python-list