On Fri, 09 Jan 2009 04:04:41 +0100, Johannes Bauer wrote: > As this was horribly slow (20 Minutes for a 2GB file) I coded the whole > thing in C also:
Yours took ~37 minutes for 2 GiB here. This "just" ~15 minutes: #!/usr/bin/env python from __future__ import division, with_statement import os import sys from collections import defaultdict from functools import partial from itertools import imap def iter_max_values(blocks, block_count): for i, block in enumerate(blocks): histogram = defaultdict(int) for byte in block: histogram[byte] += 1 yield max((count, byte) for value, count in histogram.iteritems())[1] if i % 1024 == 0: print 'Progresss: %.1f%%' % (100 * i / block_count) def write_pgm(filename, width, height, pixel_values): with open(filename, 'w') as pgm_file: pgm_file.write('P2\n' '# CREATOR: Crappyass Python Script\n' '%d %d\n' '255\n' % (width, height)) pgm_file.writelines('%d\n' % value for value in pixel_values) def main(): filename = sys.argv[1] filesize = os.path.getsize(filename) width = 1024 height = 1024 pixels = width * height blocksize = filesize // width // height print 'Filesize : %d' % filesize print 'Image size : %dx%d' % (width, height) print 'Bytes per Pixel: %d' % blocksize with open(filename, 'rb') as data_file: blocks = iter(partial(data_file.read, blocksize), '') pixel_values = imap(ord, iter_max_values(blocks, pixels)) write_pgm(filename + '.pgm', width, height, pixel_values) if __name__ == '__main__': main() Ciao, Marc 'BlackJack' Rintsch -- http://mail.python.org/mailman/listinfo/python-list