At http://people.ubuntu.com/~robertc/baz2.0/plugins/compressbench/trunk
------------------------------------------------------------ revno: 5 revision-id: [email protected] parent: [email protected] committer: Robert Collins <[email protected]> branch nick: trunk timestamp: Fri 2009-01-23 17:28:54 +1100 message: Add git and beta dulwich compressor support. === modified file 'bench.py' --- a/bench.py 2009-01-22 01:08:18 +0000 +++ b/bench.py 2009-01-23 06:28:54 +0000 @@ -94,11 +94,113 @@ source.close() +from subprocess import Popen, PIPE +from bzrlib.btree_index import BTreeBuilder +from bzrlib.versionedfile import FulltextContentFactory, VersionedFiles +class GitVersionedFiles(VersionedFiles): + + def __init__(self, transport): + self._index = BTreeBuilder() + self._transport = transport + self._path = transport.local_abspath('.') + self._run_git(['init']) + transport.put_bytes('content', '') + self._run_git(['add', 'content']) + + def _run_git(self, params): + process = Popen(['git'] + params, cwd=self._path, stdin=PIPE, + stdout=PIPE, stderr=PIPE) + out, err = process.communicate() + rc = process.returncode + assert rc == 0, 'bad result %d %r %r' % (rc, out, err) + return out, err + + def get_record_stream(self, keys, ordering, include_delta_closure): + for key in keys: + blob_id = self._index.iter_entries([key]).next()[2] + content, _ = self._run_git(['cat-file', '-p', blob_id]) + yield FulltextContentFactory(key, (), None, content) + + def insert_record_stream(self, stream): + for record in stream: + self._transport.put_bytes('content', record.get_bytes_as('fulltext')) + # Save the content + out,_ = self._run_git(['hash-object', 'content']) + blob_id = out[:-1] + # record a tree so pack will work as normal with ordering etc. + # NB: note that this is double-hashing the file content probably. + self._run_git(['commit', '-m', 'foo', 'content']) + self._index.add_node(record.key, blob_id) + + def keys(self): + result = set() + for node in self._index.iter_all_entries(): + result.add(node[1]) + return result + + def _pack(self): + self._run_git(['repack', '-a', '-d', '--depth=200', '--window=200']) + + def _pack_name(self): + paths = self._transport.list_dir('.git/objects/pack') + for path in paths: + if path.endswith('.pack'): + return '.git/objects/pack/' + path + + def size(self): + return self._transport.stat(self._pack_name()).st_size + + +class GitWriterThunk(object): + + def __init__(self, vf): + self.vf = vf + + def close(self): + pass + + def end(self): + self.vf._pack() + + +def make_git_factory(delta, graph, keylength): + """Make a VF factory based on invoking a git commit for each step.""" + def factory(transport): + result = GitVersionedFiles(transport) + result.writer = GitWriterThunk(result) + result.stream = result.writer + return result + return factory + + +def make_dulwich_factory(delta, graph, keylength): + """Make a dulwich-using git backed VF.""" + from dulwich.pack import Pack + class DulwichVersionedFiles(GitVersionedFiles): + """A dulwich using VF implementation.""" + def __init__(self, transport): + GitVersionedFiles.__init__(self, transport) + self._pack_obj = None + def get_record_stream(self, keys, ordering, include_delta_closure): + if self._pack_obj is None: + self._pack_obj = Pack(self._transport.local_abspath(self._pack_name()[:-5])) + for key in keys: + blob_id = self._index.iter_entries([key]).next()[2] + content = self._pack_obj.get_raw(blob_id) + yield FulltextContentFactory(key, (), None, content) + + def factory(transport): + result = DulwichVersionedFiles(transport) + result.writer = GitWriterThunk(result) + result.stream = result.writer + return result + return factory + class cmd_compressbench(Command): """Benchmark compression performance.""" takes_options = [ - ListOption('delta', type=str, help='Compressors to test. (gc, knit)'), + ListOption('delta', type=str, help='Compressors to test. (gc, knit, git, dulwich)'), Option('limit', type=int, help='Cap the corpus at limit bytes'), ] @@ -122,14 +224,19 @@ factory.parents = parents parents = (factory.key,) # Now for each compression type benchmark it: - factories = {'knit':make_pack_factory, 'gc':make_gc_factory} + factories = {'knit':make_pack_factory, 'gc':make_gc_factory, + 'git':make_git_factory, 'dulwich':make_dulwich_factory} for label in delta: factory = factories[label] print "Testing", label, "compression" vf, vf_t, vf_cleanup = self.test_compressor(stream, factory) try: # decompress - print "Compressed size", vf_t.stat('newpack').st_size + try: + size = vf.size() + except AttributeError: + size = vf_t.stat('newpack').st_size + print "Compressed size", size self.test_decompress(vf) finally: vf_cleanup() -- bazaar-commits mailing list [email protected] https://lists.ubuntu.com/mailman/listinfo/bazaar-commits
