At http://bazaar.launchpad.net/%7Ebzr/bzr-groupcompress/rabin

------------------------------------------------------------
revno: 81
revision-id: [email protected]
parent: [email protected]
committer: John Arbash Meinel <[email protected]>
branch nick: rabin
timestamp: Mon 2009-03-02 16:38:28 -0600
message:
  Fix a bug when handling multiple large-range copies.
  
  We were adjusting moff multiple times, without adjusting it back.
=== modified file '_groupcompress_pyx.pyx'
--- a/_groupcompress_pyx.pyx    2009-03-02 21:02:23 +0000
+++ b/_groupcompress_pyx.pyx    2009-03-02 22:38:28 +0000
@@ -249,7 +249,8 @@
     # handling, and to avoid double allocating memory
     if (delta_size < DELTA_SIZE_MIN):
         # XXX: Invalid delta block
-        return None
+        raise RuntimeError('delta_size %d smaller than min delta size %d'
+                           % (delta_size, DELTA_SIZE_MIN))
 
     data = <unsigned char *>delta
     top = data + delta_size
@@ -259,7 +260,8 @@
     size = get_delta_hdr_size(&data, top)
     if (size > source_size):
         # XXX: mismatched source size
-        return None
+        raise RuntimeError('source size %d < expected source size %d'
+                           % (source_size, size))
     source_size = size
 
     # now the result size
@@ -302,13 +304,17 @@
             if (cp_off + cp_size < cp_size or
                 cp_off + cp_size > source_size or
                 cp_size > size):
-                break
+                raise RuntimeError('Something wrong with:'
+                    ' cp_off = %s, cp_size = %s'
+                    ' source_size = %s, size = %s'
+                    % (cp_off, cp_size, source_size, size))
             memcpy(out, source + cp_off, cp_size)
             out = out + cp_size
             size = size - cp_size
         elif (cmd):
             if (cmd > size):
-                break
+                raise RuntimeError('Insert instruction longer than remaining'
+                    ' bytes: %d > %d' % (cmd, size))
             memcpy(out, data, cmd)
             out = out + cmd
             data = data + cmd
@@ -320,11 +326,14 @@
             #  * encountering them (might be data corruption).
             #  */
             ## /* XXX: error("unexpected delta opcode 0"); */
-            return None
+            raise RuntimeError('Got delta opcode: 0, not supported')
 
     # /* sanity check */
     if (data != top or size != 0):
         ## /* XXX: error("delta replay has gone wild"); */
+        raise RuntimeError('Did not extract the number of bytes we expected'
+            ' we were left with %d bytes in "size", and top - data = %d'
+            % (size, <int>(top - data)))
         return None
 
     # *dst_size = out - dst_buf;

=== modified file 'diff-delta.c'
--- a/diff-delta.c      2009-03-02 21:02:23 +0000
+++ b/diff-delta.c      2009-03-02 22:38:28 +0000
@@ -446,8 +446,8 @@
                                        if (msize < ref - entry->ptr) {
                                                /* this is our best match so 
far */
                                                msize = ref - entry->ptr;
-                                               moff = entry->ptr - ref_data;
                                                mindex = index;
+                                               moff = entry->ptr - ref_data + 
mindex->agg_src_offset;
                                                if (msize >= 4096) /* good 
enough */
                                                        break;
                                        }
@@ -477,10 +477,17 @@
                        unsigned char *op;
 
                        if (inscnt) {
+                               unsigned int local_moff;
+
+                               /* moff is the offset in the global structure, 
we only want the
+                                * offset in the local source.
+                                */
+                               local_moff = moff - mindex->agg_src_offset;
                                ref_data = mindex->src_buf;
-                               while (moff && ref_data[moff-1] == data[-1]) {
+                               while (local_moff && ref_data[local_moff-1] == 
data[-1]) {
                                        /* we can match one byte back */
                                        msize++;
+                                       local_moff--;
                                        moff--;
                                        data--;
                                        outpos--;
@@ -501,10 +508,6 @@
                        op = out + outpos++;
                        i = 0x80;
 
-                       /* so far, moff has been the offset in a single source, 
however,
-                        * now we encode it as the offset in the aggregate 
source
-                        */
-                       moff = moff + mindex->agg_src_offset;
                        if (moff & 0x000000ff)
                                out[outpos++] = moff >> 0,  i |= 0x01;
                        if (moff & 0x0000ff00)

=== modified file 'groupcompress.py'
--- a/groupcompress.py  2009-03-02 21:02:23 +0000
+++ b/groupcompress.py  2009-03-02 22:38:28 +0000
@@ -167,6 +167,10 @@
             new_chunks = []
         else:
             new_chunks = ['label:%s\nsha1:%s\n' % (label, sha1)]
+        if self._delta_index._source_offset != self.endpoint:
+            raise AssertionError('_source_offset != endpoint'
+                ' somehow the DeltaIndex got out of sync with'
+                ' the output lines')
         delta = self._delta_index.make_delta(target_text)
         if (delta is None
             or len(delta) > len(target_text) / 2):
@@ -178,10 +182,6 @@
                 new_chunks.insert(0, 'fulltext\n')
                 new_chunks.append('len:%s\n' % (input_len,))
             unadded_bytes = sum(map(len, new_chunks))
-            deltas_unadded = (self.endpoint - self._delta_index._source_offset)
-            if deltas_unadded != 0:
-                import pdb; pdb.set_trace()
-            unadded_bytes += deltas_unadded
             self._delta_index.add_source(target_text, unadded_bytes)
             new_chunks.append(target_text)
         else:
@@ -190,6 +190,7 @@
             else:
                 new_chunks.insert(0, 'delta\n')
                 new_chunks.append('len:%s\n' % (len(delta),))
+            # unadded_bytes = sum(map(len, new_chunks))
             # self._delta_index.add_source(delta, unadded_bytes)
             new_chunks.append(delta)
             unadded_bytes = sum(map(len, new_chunks))

-- 
bazaar-commits mailing list
[email protected]
https://lists.ubuntu.com/mailman/listinfo/bazaar-commits

Reply via email to