At http://bzr.arbash-meinel.com/plugins/groupcompress_rabin

------------------------------------------------------------
revno: 58
revision-id: [email protected]
parent: [email protected]
committer: John Arbash Meinel <[email protected]>
branch nick: groupcompress_rabin
timestamp: Fri 2009-02-27 14:18:47 -0600
message:
  Allowing the source bytes to be longer than expected.
  This makes a huge difference for extraction speed.
  10s versus 45s. Versus 17s for the original groupcompress code.
  
  
  Also, the compiled version in _groupcompress_c seems ~ the same speed as
  the patch-delta.c version.
  At the very least, the extra memory copy overhead negates any benefit.
=== modified file '_groupcompress_c.pyx'
--- a/_groupcompress_c.pyx      2009-02-27 18:21:04 +0000
+++ b/_groupcompress_c.pyx      2009-02-27 20:18:47 +0000
@@ -144,9 +144,10 @@
     # make sure the orig file size matches what we expect
     # XXX: gcc warns because data isn't defined as 'const'
     size = get_delta_hdr_size(&data, top)
-    if (size != source_size):
+    if (size > source_size):
         # XXX: mismatched source size
         return None
+    source_size = size
 
     # now the result size
     size = get_delta_hdr_size(&data, top)

=== modified file 'groupcompress.py'
--- a/groupcompress.py  2009-02-27 19:54:27 +0000
+++ b/groupcompress.py  2009-02-27 20:18:47 +0000
@@ -475,8 +475,11 @@
                 else:
                     # TODO: relax apply_delta so that it can allow source to be
                     #       longer than expected
-                    chunks = [_groupcompress_c.apply_delta(
-                                plain[0:index_memo[3]], delta)]
+                    bytes = _groupcompress_c.apply_delta(plain, delta)
+                    if bytes is None:
+                        import pdb; pdb.set_trace()
+                    chunks = [bytes]
+                    del bytes
                 if sha_strings(chunks) != sha1:
                     raise AssertionError('sha1 sum did not match')
             yield ChunkedContentFactory(key, parents, sha1, chunks)

=== modified file 'patch-delta.c'
--- a/patch-delta.c     2009-02-27 17:32:04 +0000
+++ b/patch-delta.c     2009-02-27 20:18:47 +0000
@@ -27,8 +27,11 @@
 
        /* make sure the orig file size matches what we expect */
        size = get_delta_hdr_size(&data, top);
-       if (size != src_size)
+       /* MOD: We allow a bigger source, assuming we only compressed
+          against the first bytes. */
+       if (size > src_size)
                return NULL;
+       src_size = size;
 
        /* now the result size */
        size = get_delta_hdr_size(&data, top);

-- 
bazaar-commits mailing list
[email protected]
https://lists.ubuntu.com/mailman/listinfo/bazaar-commits

Reply via email to