Ok, third iteration of the patch in attachment. It passes make check.

As discussed in [1], this version keeps 50 lines of the identical
suffix around, to give the algorithm a good chance to generate a diff
output of good quality (in all but the most extreme cases, this will
be the same as with the original svn_diff algorithm).

That's about the only difference with the previous iteration. So for
now, I'm submitting this for review. Any feedback is very welcome :-).

I still consider this a WIP, because of the following remaining todo's
(which may have a lot of impact on the current implementation):
- Generalize for more than 2 datasources (for diff3 and diff4).
- revv svn_diff_fns_t and maybe other stuff I've changed in public API.
- Add support for -x-b, -x-w, and -x--ignore-eol-style options. Maybe
switch the implementation to read out entire lines before comparing
(like datasources_get_next_token does).

Log message:
[[[
Make svn_diff_diff skip identical prefix and suffix to make diff and blame
faster.

* subversion/include/svn_diff.h
  (svn_diff_fns_t): Added new function types datasources_open and
   get_prefix_lines to the vtable.

* subversion/libsvn_diff/diff_memory.c
  (datasources_open): New function (does nothing).
  (get_prefix_lines): New function (does nothing).
  (svn_diff__mem_vtable): Added new functions datasources_open and
   get_prefix_lines.

* subversion/libsvn_diff/diff_file.c
  (svn_diff__file_baton_t): Added members prefix_lines, suffix_start_chunk[4]
   and suffix_offset_in_chunk.
  (increment_pointer_or_chunk, decrement_pointer_or_chunk): New functions.
  (find_identical_prefix, find_identical_suffix): New functions.
  (datasources_open): New function, to open both datasources and find their
   identical prefix and suffix. From the identical suffix, 50 lines are kept to
   help the diff algorithm find the nicest possible diff representation
   in case of ambiguity.
  (get_prefix_lines): New function.
  (datasource_get_next_token): Stop at start of identical suffix.
  (svn_diff__file_vtable): Added new functions datasources_open and
   get_prefix_lines.

* subversion/libsvn_diff/diff.h
  (svn_diff__get_tokens): Added argument "datasource_opened", to indicate that
   the datasource was already opened.

* subversion/libsvn_diff/token.c
  (svn_diff__get_tokens): Added argument "datasource_opened". Only open the
   datasource if datasource_opened is FALSE. Set the starting offset of the
   position list to the number of prefix lines.

* subversion/libsvn_diff/lcs.c
  (svn_diff__lcs): Added argument "prefix_lines". Use this to correctly set
   the offset of the sentinel position for EOF, even if one of the files
   became empty after eliminating the identical prefix.

* subversion/libsvn_diff/diff.c
  (svn_diff__diff): Add a chunk of "common" diff for identical prefix.
  (svn_diff_diff): Use new function datasources_open, to open original and
   modified at once, and find their identical prefix and suffix. Pass
   prefix_lines to svn_diff__lcs and to svn_diff__diff.

* subversion/libsvn_diff/diff3.c
  (svn_diff_diff3): Pass datasource_opened = FALSE to svn_diff__get_tokens.
   Pass prefix_lines = 0 to svn_diff__lcs.

* subversion/libsvn_diff/diff4.c
  (svn_diff_diff4): Pass datasource_opened = FALSE to svn_diff__get_tokens.
   Pass prefix_lines = 0 to svn_diff__lcs.
]]]


Cheers,
-- 
Johan

[1] http://svn.haxx.se/dev/archive-2010-10/0141.shtml
Index: subversion/include/svn_diff.h
===================================================================
--- subversion/include/svn_diff.h       (revision 1006020)
+++ subversion/include/svn_diff.h       (working copy)
@@ -112,6 +112,11 @@ typedef struct svn_diff_fns_t
   svn_error_t *(*datasource_open)(void *diff_baton,
                                   svn_diff_datasource_e datasource);
 
+  /** Open the datasources of type @a datasources. */
+  svn_error_t *(*datasources_open)(void *diff_baton, apr_off_t *prefix_lines,
+                                   svn_diff_datasource_e datasource0,
+                                   svn_diff_datasource_e datasource1);
+
   /** Close the datasource of type @a datasource. */
   svn_error_t *(*datasource_close)(void *diff_baton,
                                    svn_diff_datasource_e datasource);
@@ -124,6 +129,9 @@ typedef struct svn_diff_fns_t
                                             void *diff_baton,
                                             svn_diff_datasource_e datasource);
 
+  /** Get the number of identical prefix lines from the @a diff_baton. */
+  apr_off_t (*get_prefix_lines)(void *diff_baton);
+
   /** A function for ordering the tokens, resembling 'strcmp' in functionality.
    * @a compare should contain the return value of the comparison:
    * If @a ltoken and @a rtoken are "equal", return 0.  If @a ltoken is
Index: subversion/libsvn_diff/diff_file.c
===================================================================
--- subversion/libsvn_diff/diff_file.c  (revision 1006020)
+++ subversion/libsvn_diff/diff_file.c  (working copy)
@@ -77,6 +77,10 @@ typedef struct svn_diff__file_baton_t
   char *curp[4];
   char *endp[4];
 
+  apr_off_t prefix_lines;
+  int suffix_start_chunk[4];
+  apr_off_t suffix_offset_in_chunk[4];
+
   /* List of free tokens that may be reused. */
   svn_diff__file_token_t *tokens;
 
@@ -233,7 +237,392 @@ datasource_open(void *baton, svn_diff_datasource_e
                     curp, length, 0, file_baton->pool);
 }
 
+static svn_error_t *
+increment_pointer_or_chunk(svn_diff__file_baton_t *file_baton,
+                           char **curp, char **endp, int *chunk_number,
+                           char *buffer, apr_off_t last_chunk_number, int idx)
+{
+  apr_off_t length;
 
+  if ((*curp) == (*endp) - 1)
+    {
+      if (*chunk_number == last_chunk_number)
+        (*curp)++; /* *curp == *endp with last chunk signals end of file */
+      else
+        {
+          (*chunk_number)++;
+          length = *chunk_number == last_chunk_number ?
+            offset_in_chunk(file_baton->size[idx]) : CHUNK_SIZE;
+          SVN_ERR(read_chunk(file_baton->file[idx],
+                             file_baton->path[idx],
+                             buffer, length,
+                             chunk_to_offset(*chunk_number),
+                             file_baton->pool));
+          *endp = buffer + length;
+          *curp = buffer;
+        }
+    }
+  else
+    {
+      (*curp)++;
+    }
+
+  return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+decrement_pointer_or_chunk(svn_diff__file_baton_t *file_baton,
+                           char **curp, char **endp, int *chunk_number,
+                           char *buffer, int idx)
+{
+  if (*curp == buffer)
+    {
+      if (*chunk_number == 0)
+        (*chunk_number)--; /* *chunk_number == -1 signals beginning of file */
+      else
+        {
+          (*chunk_number)--;
+          SVN_ERR(read_chunk(file_baton->file[idx],
+                             file_baton->path[idx],
+                             buffer, CHUNK_SIZE,
+                             chunk_to_offset(*chunk_number),
+                             file_baton->pool));
+          *endp = buffer + CHUNK_SIZE;
+          *curp = *endp - 1;
+        }
+    }
+  else
+    {
+      (*curp)--;
+    }
+
+  return SVN_NO_ERROR;
+}
+
+/* Find the identical prefix for idx0 and idx1, counting number of lines.
+ * After this function is finished, the buffers, chunks, curp's and endp's 
+ * of the file_baton are set to point at the first byte after the prefix. */
+static svn_error_t *
+find_identical_prefix(svn_diff__file_baton_t *file_baton,
+                      svn_boolean_t *at_least_one_end_reached,
+                      apr_off_t *prefix_lines,
+                      int idx0, int idx1)
+{
+  apr_off_t last_chunk0, last_chunk1;
+  svn_boolean_t had_cr = FALSE;
+
+  last_chunk0 = offset_to_chunk(file_baton->size[idx0]);
+  last_chunk1 = offset_to_chunk(file_baton->size[idx1]);
+
+  *at_least_one_end_reached = FALSE;
+  *prefix_lines = 0;
+  while (*file_baton->curp[idx0] == *file_baton->curp[idx1] 
+         && !*at_least_one_end_reached)
+    {
+      /* ### TODO: see if we can take advantage of 
+         diff options like ignore_eol_style or ignore_space. */
+      if (*file_baton->curp[idx0] == '\r')
+        {
+          (*prefix_lines)++;
+          had_cr = TRUE;
+        }
+      else if (*file_baton->curp[idx0] == '\n' && !had_cr)
+        {
+          (*prefix_lines)++;
+          had_cr = FALSE;
+        }
+      else 
+        {
+          had_cr = FALSE;
+        }
+
+      SVN_ERR(increment_pointer_or_chunk(file_baton,
+                                         &file_baton->curp[idx0],
+                                         &file_baton->endp[idx0], 
+                                         &file_baton->chunk[idx0],
+                                         file_baton->buffer[idx0],
+                                         last_chunk0, idx0));
+      SVN_ERR(increment_pointer_or_chunk(file_baton,
+                                         &file_baton->curp[idx1],
+                                         &file_baton->endp[idx1],
+                                         &file_baton->chunk[idx1],
+                                         file_baton->buffer[idx1],
+                                         last_chunk1, idx1));
+      *at_least_one_end_reached = 
+        file_baton->curp[idx0] == file_baton->endp[idx0] 
+        || file_baton->curp[idx1] == file_baton->endp[idx1];
+    }
+
+  /* If both files reached their end (i.e. are fully identical), we're done */
+  if (file_baton->curp[idx0] == file_baton->endp[idx0] 
+        && file_baton->curp[idx1] == file_baton->endp[idx1])
+    {
+      file_baton->prefix_lines = *prefix_lines;
+      return SVN_NO_ERROR;
+    }
+
+  if (had_cr && (*file_baton->curp[idx0] == '\n' 
+                 || *file_baton->curp[idx1] == '\n'))
+    {
+      /* We ended in the middle of a \r\n for one file, but \r for the other.
+         Back up one byte, so the next loop will back up the entire line. And 
+         decrement *prefix_lines, since we counted one too many for the \r. */
+      (*prefix_lines)--;
+      SVN_ERR(decrement_pointer_or_chunk(file_baton,
+                                         &file_baton->curp[idx0],
+                                         &file_baton->endp[idx0],
+                                         &file_baton->chunk[idx0], 
+                                         file_baton->buffer[idx0],
+                                         idx0));
+      SVN_ERR(decrement_pointer_or_chunk(file_baton,
+                                         &file_baton->curp[idx1],
+                                         &file_baton->endp[idx1],
+                                         &file_baton->chunk[idx1], 
+                                         file_baton->buffer[idx1],
+                                         idx1));      
+    }
+
+  /* Back up to the last eol sequence (\n, \r\n or \r) */
+  do
+    {
+      SVN_ERR(decrement_pointer_or_chunk(file_baton,
+                                         &file_baton->curp[idx0],
+                                         &file_baton->endp[idx0],
+                                         &file_baton->chunk[idx0], 
+                                         file_baton->buffer[idx0],
+                                         idx0));
+      SVN_ERR(decrement_pointer_or_chunk(file_baton,
+                                         &file_baton->curp[idx1],
+                                         &file_baton->endp[idx1],
+                                         &file_baton->chunk[idx1], 
+                                         file_baton->buffer[idx1],
+                                         idx1));
+    } while (*file_baton->curp[idx0] != '\n'
+             && *file_baton->curp[idx0] != '\r'
+             && file_baton->chunk[idx0] != -1 
+             && file_baton->chunk[idx1] != -1);
+
+  /* Slide one byte forward, to point past the eol sequence */
+  if (file_baton->chunk[idx0] == -1)
+    file_baton->chunk[idx0] = 0; /* point to beginning of file again */
+  else
+    SVN_ERR(increment_pointer_or_chunk(file_baton,
+                                       &file_baton->curp[idx0],
+                                       &file_baton->endp[idx0],
+                                       &file_baton->chunk[idx0],
+                                       file_baton->buffer[idx0],
+                                       last_chunk0, idx0));
+  if (file_baton->chunk[idx1] == -1)
+    file_baton->chunk[idx1] = 0; /* point to beginning of file again */
+  else
+    SVN_ERR(increment_pointer_or_chunk(file_baton,
+                                       &file_baton->curp[idx1],
+                                       &file_baton->endp[idx1],
+                                       &file_baton->chunk[idx1],
+                                       file_baton->buffer[idx1],
+                                       last_chunk1, idx1));
+
+  file_baton->prefix_lines = *prefix_lines;
+  return SVN_NO_ERROR;
+}
+
+#define SUFFIX_LINES_TO_KEEP 50
+
+/* Find the identical suffix for idx0 and idx1. Before this function is called
+ * the file_baton's curp's and chunks should be positioned right after the 
+ * identical prefix (which is the case after find_identical_prefix),
+ * so we can determine where suffix scanning should ultimately stop. */
+static svn_error_t *
+find_identical_suffix(svn_diff__file_baton_t *file_baton,
+                      int idx0, int idx1)
+{
+  char *suffix_buffer0, *suffix_buffer1;
+  int suffix_chunk0, suffix_chunk1;
+  apr_off_t length0, length1;
+  apr_off_t last_chunk0, last_chunk1;
+  apr_off_t suffix_min_offset0;
+  apr_off_t suffix_min_chunk0;
+  char *curp0, *curp1;
+  char *endp0, *endp1;
+  int suffix_lines_to_keep = SUFFIX_LINES_TO_KEEP;
+
+  last_chunk0 = offset_to_chunk(file_baton->size[idx0]);
+  last_chunk1 = offset_to_chunk(file_baton->size[idx1]);
+
+  /* Position everything at last chunk, pointer to last byte */
+  suffix_buffer0 = apr_palloc(file_baton->pool, 
+    (apr_size_t) (file_baton->size[idx0] > CHUNK_SIZE ? 
+                   CHUNK_SIZE : file_baton->size[idx0]));
+  suffix_chunk0 = last_chunk0;
+  length0 = file_baton->size[idx0] % CHUNK_SIZE;
+  SVN_ERR(read_chunk(file_baton->file[idx0], file_baton->path[idx0],
+                     suffix_buffer0, length0,
+                     chunk_to_offset(suffix_chunk0),
+                     file_baton->pool));
+  endp0 = suffix_buffer0 + length0;
+  curp0 = endp0 - 1;
+
+  suffix_buffer1 = apr_palloc(file_baton->pool, 
+    (apr_size_t) (file_baton->size[idx1] > CHUNK_SIZE ?
+                   CHUNK_SIZE : file_baton->size[idx1]));
+  suffix_chunk1 = last_chunk1;
+  length1 = file_baton->size[idx1] % CHUNK_SIZE;
+  SVN_ERR(read_chunk(file_baton->file[idx1], file_baton->path[idx1],
+                     suffix_buffer1, length1,
+                     chunk_to_offset(suffix_chunk1),
+                     file_baton->pool));
+  endp1 = suffix_buffer1 + length1;
+  curp1 = endp1 - 1;
+
+  /* Get the chunk and pointer offset at which we should stop scanning 
+   * backward for the identical suffix. This is just past the prefix. */
+  suffix_min_chunk0 = file_baton->chunk[idx0];
+  suffix_min_offset0 = file_baton->curp[idx0] - file_baton->buffer[idx0];
+  if (file_baton->size[idx0] > file_baton->size[idx1])
+    {
+      suffix_min_chunk0 += 
+        (file_baton->size[idx0] - file_baton->size[idx1]) / CHUNK_SIZE;
+      suffix_min_offset0 += 
+        (file_baton->size[idx0] - file_baton->size[idx1]) % CHUNK_SIZE;
+    }
+
+  /* Scan backwards until mismatch or until we are where the prefix ended */
+  while (*curp0 == *curp1 && suffix_chunk0 != -1 && suffix_chunk1 != -1
+         && !(suffix_chunk0 == suffix_min_chunk0 
+              && (curp0 - suffix_buffer0) == suffix_min_offset0))
+    {
+      SVN_ERR(decrement_pointer_or_chunk(file_baton, &curp0, &endp0,
+                                         &suffix_chunk0, suffix_buffer0,
+                                         idx0));
+      SVN_ERR(decrement_pointer_or_chunk(file_baton, &curp1, &endp1,
+                                         &suffix_chunk1, suffix_buffer1,
+                                         idx1));
+    }
+
+  /* Slide one byte forward, to point at the first byte of common suffix */
+  if (suffix_chunk0 == -1)
+    suffix_chunk0 = 0; /* point to beginning of file again */
+  else
+    SVN_ERR(increment_pointer_or_chunk(file_baton, &curp0, &endp0, 
+                                       &suffix_chunk0, suffix_buffer0,
+                                       last_chunk0, idx0));
+  if (suffix_chunk1 == -1)
+    suffix_chunk1 = 0; /* point to beginning of file again */
+  else
+    SVN_ERR(increment_pointer_or_chunk(file_baton, &curp1, &endp1, 
+                                       &suffix_chunk1, suffix_buffer1,
+                                       last_chunk1, idx1));
+
+  do
+    {
+      /* Skip until we find an eol sequence (\n, \r\n or \r), or until at least
+         one file reaches its end. */
+      while (!(curp0 == endp0 || curp1 == endp1)
+             && *curp0 != '\n' && *curp0 != '\r')
+        {
+          SVN_ERR(increment_pointer_or_chunk(file_baton, &curp0, &endp0, 
+                                             &suffix_chunk0, suffix_buffer0,
+                                             last_chunk0, idx0));
+          SVN_ERR(increment_pointer_or_chunk(file_baton, &curp1, &endp1, 
+                                             &suffix_chunk1, suffix_buffer1,
+                                             last_chunk1, idx1));
+        }
+
+      /* Slide one or two more bytes, to point past the eol. */
+      if (!(curp0 == endp0 || curp1 == endp1) && *curp0 == '\r')
+        {
+          SVN_ERR(increment_pointer_or_chunk(file_baton, &curp0, &endp0,
+                                             &suffix_chunk0, suffix_buffer0,
+                                             last_chunk0, idx0));
+          SVN_ERR(increment_pointer_or_chunk(file_baton, &curp1, &endp1,
+                                             &suffix_chunk1, suffix_buffer1,
+                                             last_chunk1, idx1));
+        }
+      if (!(curp0 == endp0 || curp1 == endp1) && *curp0 == '\n')
+        {
+          SVN_ERR(increment_pointer_or_chunk(file_baton, &curp0, &endp0,
+                                             &suffix_chunk0, suffix_buffer0,
+                                             last_chunk0, idx0));
+          SVN_ERR(increment_pointer_or_chunk(file_baton, &curp1, &endp1,
+                                             &suffix_chunk1, suffix_buffer1,
+                                             last_chunk1, idx1));
+        }
+    }
+  while (suffix_lines_to_keep-- && !(curp0 == endp0 || curp1 == endp1));
+
+  file_baton->suffix_start_chunk[idx0] = suffix_chunk0;
+  file_baton->suffix_start_chunk[idx1] = suffix_chunk1;
+  file_baton->suffix_offset_in_chunk[idx0] = curp0 - suffix_buffer0;
+  file_baton->suffix_offset_in_chunk[idx1] = curp1 - suffix_buffer1;
+
+  return SVN_NO_ERROR;
+}
+
+/* Implements svn_diff_fns_t::datasource_open */
+static svn_error_t *
+datasources_open(void *baton, apr_off_t *prefix_lines,
+                 svn_diff_datasource_e datasource0, 
+                 svn_diff_datasource_e datasource1)
+{
+  svn_diff__file_baton_t *file_baton = baton;
+  int idx0, idx1;
+  apr_finfo_t finfo0, finfo1;
+  apr_off_t length0, length1;
+  svn_boolean_t at_least_one_end_reached;
+
+  /* Open datasource0 and read first chunk */
+  idx0 = datasource_to_index(datasource0);
+  SVN_ERR(svn_io_file_open(&file_baton->file[idx0], file_baton->path[idx0],
+                           APR_READ, APR_OS_DEFAULT, file_baton->pool));
+  SVN_ERR(svn_io_file_info_get(&finfo0, APR_FINFO_SIZE,
+                               file_baton->file[idx0], file_baton->pool));
+  file_baton->size[idx0] = finfo0.size;
+  length0 = (apr_off_t) (finfo0.size > CHUNK_SIZE ? CHUNK_SIZE : finfo0.size);
+  file_baton->buffer[idx0] = apr_palloc(file_baton->pool, (apr_size_t) 
length0);
+  SVN_ERR(read_chunk(file_baton->file[idx0], file_baton->path[idx0],
+                     file_baton->buffer[idx0], length0, 0, file_baton->pool));
+  file_baton->endp[idx0] = file_baton->buffer[idx0] + length0;
+  file_baton->curp[idx0] = file_baton->buffer[idx0];
+
+  /* Open datasource1 and read first chunk */
+  idx1 = datasource_to_index(datasource1);
+  SVN_ERR(svn_io_file_open(&file_baton->file[idx1], file_baton->path[idx1],
+                           APR_READ, APR_OS_DEFAULT, file_baton->pool));
+  SVN_ERR(svn_io_file_info_get(&finfo1, APR_FINFO_SIZE,
+                               file_baton->file[idx1], file_baton->pool));
+  file_baton->size[idx1] = finfo1.size;
+  length1 = (apr_off_t) (finfo1.size > CHUNK_SIZE ? CHUNK_SIZE : finfo1.size);
+  file_baton->buffer[idx1] = apr_palloc(file_baton->pool, (apr_size_t) 
length1);
+  SVN_ERR(read_chunk(file_baton->file[idx1], file_baton->path[idx1],
+                     file_baton->buffer[idx1], length1, 0, file_baton->pool));
+  file_baton->endp[idx1] = file_baton->buffer[idx1] + length1;
+  file_baton->curp[idx1] = file_baton->buffer[idx1];
+
+  if (length0 == 0 || length1 == 0)
+    /* There will not be any identical prefix/suffix, so we're done. */
+    return SVN_NO_ERROR;
+
+  SVN_ERR(find_identical_prefix(file_baton, &at_least_one_end_reached,
+                                prefix_lines, idx0, idx1));
+
+  if (at_least_one_end_reached)
+    /* At least one file consisted totally of identical prefix, 
+     * so there will be no identical suffix. We're done. */
+    return SVN_NO_ERROR;
+
+  SVN_ERR(find_identical_suffix(file_baton, idx0, idx1));
+
+  return SVN_NO_ERROR;
+}
+
+static apr_off_t
+get_prefix_lines(void *baton)
+{
+  svn_diff__file_baton_t *file_baton = baton;
+
+  return file_baton->prefix_lines;
+}
+
 /* Implements svn_diff_fns_t::datasource_close */
 static svn_error_t *
 datasource_close(void *baton, svn_diff_datasource_e datasource)
@@ -277,6 +666,11 @@ datasource_get_next_token(apr_uint32_t *hash, void
       return SVN_NO_ERROR;
     }
 
+  if (file_baton->suffix_start_chunk[idx] || 
file_baton->suffix_offset_in_chunk[idx])
+    if (file_baton->chunk[idx] == file_baton->suffix_start_chunk[idx]
+        && (curp - file_baton->buffer[idx]) == 
file_baton->suffix_offset_in_chunk[idx])
+      return SVN_NO_ERROR;
+
   /* Get a new token */
   file_token = file_baton->tokens;
   if (file_token)
@@ -526,8 +920,10 @@ token_discard_all(void *baton)
 static const svn_diff_fns_t svn_diff__file_vtable =
 {
   datasource_open,
+  datasources_open,
   datasource_close,
   datasource_get_next_token,
+  get_prefix_lines,
   token_compare,
   token_discard,
   token_discard_all
Index: subversion/libsvn_diff/diff_memory.c
===================================================================
--- subversion/libsvn_diff/diff_memory.c        (revision 1006020)
+++ subversion/libsvn_diff/diff_memory.c        (working copy)
@@ -95,7 +95,23 @@ datasource_open(void *baton, svn_diff_datasource_e
   return SVN_NO_ERROR;
 }
 
+/* Implements svn_diff_fns_t::datasources_open */
+static svn_error_t *
+datasources_open(void *baton, apr_off_t *prefix_lines,
+                 svn_diff_datasource_e datasource0, 
+                 svn_diff_datasource_e datasource1)
+{
+  /* Do nothing: everything is already there and initialized to 0 */
+  return SVN_NO_ERROR;
+}
 
+/* Implements svn_diff_fns_t::datasource_get_prefix_lines */
+static apr_off_t
+get_prefix_lines(void *baton)
+{
+  return 0;
+}
+
 /* Implements svn_diff_fns_t::datasource_close */
 static svn_error_t *
 datasource_close(void *baton, svn_diff_datasource_e datasource)
@@ -189,8 +205,10 @@ token_discard_all(void *baton)
 static const svn_diff_fns_t svn_diff__mem_vtable =
 {
   datasource_open,
+  datasources_open,
   datasource_close,
   datasource_get_next_token,
+  get_prefix_lines,
   token_compare,
   token_discard,
   token_discard_all
Index: subversion/libsvn_diff/token.c
===================================================================
--- subversion/libsvn_diff/token.c      (revision 1006020)
+++ subversion/libsvn_diff/token.c      (working copy)
@@ -139,6 +139,7 @@ svn_diff__get_tokens(svn_diff__position_t **positi
                      void *diff_baton,
                      const svn_diff_fns_t *vtable,
                      svn_diff_datasource_e datasource,
+                     svn_boolean_t datasource_opened,
                      apr_pool_t *pool)
 {
   svn_diff__position_t *start_position;
@@ -152,10 +153,11 @@ svn_diff__get_tokens(svn_diff__position_t **positi
   *position_list = NULL;
 
 
-  SVN_ERR(vtable->datasource_open(diff_baton, datasource));
+  if (!datasource_opened)
+    SVN_ERR(vtable->datasource_open(diff_baton, datasource));
 
   position_ref = &start_position;
-  offset = 0;
+  offset = vtable->get_prefix_lines(diff_baton);
   hash = 0; /* The callback fn doesn't need to touch it per se */
   while (1)
     {
Index: subversion/libsvn_diff/lcs.c
===================================================================
--- subversion/libsvn_diff/lcs.c        (revision 1006020)
+++ subversion/libsvn_diff/lcs.c        (working copy)
@@ -163,6 +163,7 @@ svn_diff__lcs_reverse(svn_diff__lcs_t *lcs)
 svn_diff__lcs_t *
 svn_diff__lcs(svn_diff__position_t *position_list1, /* pointer to tail (ring) 
*/
               svn_diff__position_t *position_list2, /* pointer to tail (ring) 
*/
+              apr_off_t prefix_lines,
               apr_pool_t *pool)
 {
   int idx;
@@ -180,9 +181,11 @@ svn_diff__lcs(svn_diff__position_t *position_list1
    */
   lcs = apr_palloc(pool, sizeof(*lcs));
   lcs->position[0] = apr_pcalloc(pool, sizeof(*lcs->position[0]));
-  lcs->position[0]->offset = position_list1 ? position_list1->offset + 1 : 1;
+  lcs->position[0]->offset = position_list1 ? 
+    position_list1->offset + 1 : prefix_lines + 1;
   lcs->position[1] = apr_pcalloc(pool, sizeof(*lcs->position[1]));
-  lcs->position[1]->offset = position_list2 ? position_list2->offset + 1 : 1;
+  lcs->position[1]->offset = position_list2 ?
+    position_list2->offset + 1 : prefix_lines + 1;
   lcs->length = 0;
   lcs->refcount = 1;
   lcs->next = NULL;
Index: subversion/libsvn_diff/diff.h
===================================================================
--- subversion/libsvn_diff/diff.h       (revision 1006020)
+++ subversion/libsvn_diff/diff.h       (working copy)
@@ -91,6 +91,7 @@ typedef enum svn_diff__normalize_state_t
 svn_diff__lcs_t *
 svn_diff__lcs(svn_diff__position_t *position_list1, /* pointer to tail (ring) 
*/
               svn_diff__position_t *position_list2, /* pointer to tail (ring) 
*/
+              apr_off_t prefix_lines,
               apr_pool_t *pool);
 
 
@@ -111,6 +112,7 @@ svn_diff__get_tokens(svn_diff__position_t **positi
                      void *diff_baton,
                      const svn_diff_fns_t *vtable,
                      svn_diff_datasource_e datasource,
+                     svn_boolean_t datasource_opened,
                      apr_pool_t *pool);
 
 
Index: subversion/libsvn_diff/diff.c
===================================================================
--- subversion/libsvn_diff/diff.c       (revision 1006020)
+++ subversion/libsvn_diff/diff.c       (working copy)
@@ -43,6 +43,22 @@ svn_diff__diff(svn_diff__lcs_t *lcs,
   svn_diff_t *diff;
   svn_diff_t **diff_ref = &diff;
 
+  if (want_common && (original_start > 1))
+    {
+      /* we have a prefix to skip */
+      (*diff_ref) = apr_palloc(pool, sizeof(**diff_ref));
+
+      (*diff_ref)->type = svn_diff__type_common;
+      (*diff_ref)->original_start = 0;
+      (*diff_ref)->original_length = original_start - 1;
+      (*diff_ref)->modified_start = 0;
+      (*diff_ref)->modified_length = modified_start - 1;
+      (*diff_ref)->latest_start = 0;
+      (*diff_ref)->latest_length = 0;
+
+      diff_ref = &(*diff_ref)->next;
+    }
+
   while (1)
     {
       if (original_start < lcs->position[0]->offset
@@ -108,6 +124,7 @@ svn_diff_diff(svn_diff_t **diff,
   svn_diff__lcs_t *lcs;
   apr_pool_t *subpool;
   apr_pool_t *treepool;
+  apr_off_t prefix_lines = 0;
 
   *diff = NULL;
 
@@ -116,17 +133,22 @@ svn_diff_diff(svn_diff_t **diff,
 
   svn_diff__tree_create(&tree, treepool);
 
+  SVN_ERR(vtable->datasources_open(diff_baton, &prefix_lines,
+    svn_diff_datasource_original, svn_diff_datasource_modified));
+
   /* Insert the data into the tree */
   SVN_ERR(svn_diff__get_tokens(&position_list[0],
                                tree,
                                diff_baton, vtable,
                                svn_diff_datasource_original,
+                               TRUE,
                                subpool));
 
   SVN_ERR(svn_diff__get_tokens(&position_list[1],
                                tree,
                                diff_baton, vtable,
                                svn_diff_datasource_modified,
+                               TRUE,
                                subpool));
 
   /* The cool part is that we don't need the tokens anymore.
@@ -139,10 +161,10 @@ svn_diff_diff(svn_diff_t **diff,
   svn_pool_destroy(treepool);
 
   /* Get the lcs */
-  lcs = svn_diff__lcs(position_list[0], position_list[1], subpool);
+  lcs = svn_diff__lcs(position_list[0], position_list[1], prefix_lines, 
subpool);
 
   /* Produce the diff */
-  *diff = svn_diff__diff(lcs, 1, 1, TRUE, pool);
+  *diff = svn_diff__diff(lcs, prefix_lines + 1, prefix_lines + 1, TRUE, pool);
 
   /* Get rid of all the data we don't have a use for anymore */
   svn_pool_destroy(subpool);
Index: subversion/libsvn_diff/diff3.c
===================================================================
--- subversion/libsvn_diff/diff3.c      (revision 1006020)
+++ subversion/libsvn_diff/diff3.c      (working copy)
@@ -173,7 +173,7 @@ svn_diff__resolve_conflict(svn_diff_t *hunk,
         position[1]->next = start_position[1];
       }
 
-    *lcs_ref = svn_diff__lcs(position[0], position[1],
+    *lcs_ref = svn_diff__lcs(position[0], position[1], 0,
                              subpool);
 
     /* Fix up the EOF lcs element in case one of
@@ -267,18 +267,21 @@ svn_diff_diff3(svn_diff_t **diff,
                                tree,
                                diff_baton, vtable,
                                svn_diff_datasource_original,
+                               FALSE,
                                subpool));
 
   SVN_ERR(svn_diff__get_tokens(&position_list[1],
                                tree,
                                diff_baton, vtable,
                                svn_diff_datasource_modified,
+                               FALSE,
                                subpool));
 
   SVN_ERR(svn_diff__get_tokens(&position_list[2],
                                tree,
                                diff_baton, vtable,
                                svn_diff_datasource_latest,
+                               FALSE,
                                subpool));
 
   /* Get rid of the tokens, we don't need them to calc the diff */
@@ -289,9 +292,9 @@ svn_diff_diff3(svn_diff_t **diff,
   svn_pool_destroy(treepool);
 
   /* Get the lcs for original-modified and original-latest */
-  lcs_om = svn_diff__lcs(position_list[0], position_list[1],
+  lcs_om = svn_diff__lcs(position_list[0], position_list[1], 0,
                          subpool);
-  lcs_ol = svn_diff__lcs(position_list[0], position_list[2],
+  lcs_ol = svn_diff__lcs(position_list[0], position_list[2], 0,
                          subpool);
 
   /* Produce a merged diff */
Index: subversion/libsvn_diff/diff4.c
===================================================================
--- subversion/libsvn_diff/diff4.c      (revision 1006020)
+++ subversion/libsvn_diff/diff4.c      (working copy)
@@ -194,24 +194,28 @@ svn_diff_diff4(svn_diff_t **diff,
                                tree,
                                diff_baton, vtable,
                                svn_diff_datasource_original,
+                               FALSE,
                                subpool2));
 
   SVN_ERR(svn_diff__get_tokens(&position_list[1],
                                tree,
                                diff_baton, vtable,
                                svn_diff_datasource_modified,
+                               FALSE,
                                subpool));
 
   SVN_ERR(svn_diff__get_tokens(&position_list[2],
                                tree,
                                diff_baton, vtable,
                                svn_diff_datasource_latest,
+                               FALSE,
                                subpool));
 
   SVN_ERR(svn_diff__get_tokens(&position_list[3],
                                tree,
                                diff_baton, vtable,
                                svn_diff_datasource_ancestor,
+                               FALSE,
                                subpool2));
 
   /* Get rid of the tokens, we don't need them to calc the diff */
@@ -222,7 +226,7 @@ svn_diff_diff4(svn_diff_t **diff,
   svn_pool_clear(subpool3);
 
   /* Get the lcs for original - latest */
-  lcs_ol = svn_diff__lcs(position_list[0], position_list[2], subpool3);
+  lcs_ol = svn_diff__lcs(position_list[0], position_list[2], 0, subpool3);
   diff_ol = svn_diff__diff(lcs_ol, 1, 1, TRUE, pool);
 
   svn_pool_clear(subpool3);
@@ -243,7 +247,7 @@ svn_diff_diff4(svn_diff_t **diff,
   /* Get the lcs for common ancestor - original
    * Do reverse adjustements
    */
-  lcs_adjust = svn_diff__lcs(position_list[3], position_list[2], subpool3);
+  lcs_adjust = svn_diff__lcs(position_list[3], position_list[2], 0, subpool3);
   diff_adjust = svn_diff__diff(lcs_adjust, 1, 1, FALSE, subpool3);
   adjust_diff(diff_ol, diff_adjust);
 
@@ -252,7 +256,7 @@ svn_diff_diff4(svn_diff_t **diff,
   /* Get the lcs for modified - common ancestor
    * Do forward adjustments
    */
-  lcs_adjust = svn_diff__lcs(position_list[1], position_list[3], subpool3);
+  lcs_adjust = svn_diff__lcs(position_list[1], position_list[3], 0, subpool3);
   diff_adjust = svn_diff__diff(lcs_adjust, 1, 1, FALSE, subpool3);
   adjust_diff(diff_ol, diff_adjust);
 

Reply via email to