Hi!

[[[
Fix #3460 - svn patch is not fuzzy when applying unidiffs.

* subversion/include/private/svn_diff_private.h
  (svn_hunk_t): Add fields context_before and context_before. They are
    used for determining if there is enough context to apply a patch
    with fuzz.

* subversion/libsvn_diff/parse-diff.c
  (parse_next_hunk): Count number of lines of context at start and end
    of hunk and save the information in hunk.

* subversion/tests/cmdline/patch_tests.py
  (patch_with_fuzz): New.
  (test_list): Add new test.

* subversion/libsvn_client/patch.c
  (hunk_info_t): Add field fuzz.
  (match_hunk): Add new parameter fuzz. Record nr of lines read and
    ignore the ones at the beginning and end of the hunk that is fuzzy.
    Use context_before and context_after to ignore the cases when there
    isn't enough context to do fuzzy patching.
  (scan_for_match): Add new parameter fuzz. Call match_hunk() with fuzz.
  (get_hunk_info): Add new parameter fuzz. Call scan_for_match() with
    fuzz. Save the used fuzz in hi to be used when the hunk should be
    copied to the target.
  (copy_hunk_text): Add nr_of_lines and fuzz parameter. Record read_lines
    and only copy those who are not within the fuzz limit. They have
    already been or will be copied from the target. If nr_of_lines is 0
    we assume that we should copy all lines and ignore fuzz.
  (apply_one_hunk): Adjust lines copied to the target to include the
    context lines at the beginning of the hunk who are fuzzy. Adjust the
    current_line of the target to point to the last line of the hunk
    minus fuzz.
  (apply_one_patch): Try to call get_hunk_info() with no fuzz. If we get
    no matching line try with fuzz 1 and if that fails try with fuzz 2.
  
Patch by: Daniel Näslund <daniel{_AT_}longitudo.com>
]]]

copy_hunk_text() is called from two places. It can either be a 
modified_text or diff_text. Diff_text contains the whole hunk and is
copied if we have a rejected hunk. There is no recording of the number
of lines. So I said that if nr_of_lines is zero then we just copy all
lines. A bit of a quick hack. I think we can do it since it's just a
local helper but someone else might object.

It's easy to make off-by-one errors. I probably should extend the test
case further. But this one at least covers:

* Fuzz at beginning of file
* Fuzz at end of file
* Fuzz with value 1
* Fuzz with value 2
* Fuzz 2 with only the second context line changed

The patch test only adds new lines. There is no removal or delete.

Daniel
Index: subversion/libsvn_diff/parse-diff.c
===================================================================
--- subversion/libsvn_diff/parse-diff.c	(revision 903774)
+++ subversion/libsvn_diff/parse-diff.c	(arbetskopia)
@@ -224,6 +224,9 @@
   svn_stream_t *original_text;
   svn_stream_t *modified_text;
   svn_linenum_t original_lines;
+  svn_linenum_t context_before = 0;
+  svn_linenum_t context_after = 0;
+  svn_boolean_t reached_changed_text = FALSE;
   apr_pool_t *iterpool;
 
   if (apr_file_eof(patch->patch_file) == APR_EOF)
@@ -282,10 +285,21 @@
             {
               hunk_seen = TRUE;
               original_lines--;
+              if (reached_changed_text)
+                context_after++;
+              else
+                context_before++;
             }
+          else if (c == '-')
+            {
+              hunk_seen = TRUE;
+              original_lines--;
+              reached_changed_text = TRUE;
+            }
           else if (c == '+')
             {
               hunk_seen = TRUE;
+              reached_changed_text = TRUE;
             }
           /* Tolerate chopped leading spaces on empty lines. */
           else if (original_lines > 0 && ! eof && line->len == 0)
@@ -363,6 +377,8 @@
       (*hunk)->diff_text = diff_text;
       (*hunk)->original_text = original_text;
       (*hunk)->modified_text = modified_text;
+      (*hunk)->context_before = context_before;
+      (*hunk)->context_after = context_after;
     }
   else
     /* Something went wrong, just discard the result. */
Index: subversion/tests/cmdline/patch_tests.py
===================================================================
--- subversion/tests/cmdline/patch_tests.py	(revision 903774)
+++ subversion/tests/cmdline/patch_tests.py	(arbetskopia)
@@ -967,7 +967,137 @@
                                        None, # expected err
                                        1, # check-props
                                        1) # dry-run
+def patch_with_fuzz(sbox):
+  "apply a patch with fuzz"
 
+  sbox.build()
+  wc_dir = sbox.wc_dir
+  patch_file_path = tempfile.mkstemp(dir=os.path.abspath(svntest.main.temp_dir))[1]
+
+  mu_path = os.path.join(wc_dir, 'A', 'mu')
+
+  # We have replaced a couple of lines to cause fuzz. Those line contains
+  # the word fuzz
+  mu_contents = [
+    "Line replaced for fuzz = 1\n",
+    "\n",
+    "We wish to congratulate you over your email success in our computer\n",
+    "Balloting. This is a Millennium Scientific Electronic Computer Draw\n",
+    "in which email addresses were used. All participants were selected\n",
+    "through a computer ballot system drawn from over 100,000 company\n",
+    "and 50,000,000 individual email addresses from all over the world.\n",
+    "Line replaced for fuzz = 2 with only the second context line changed\n",
+    "Your email address drew and have won the sum of  750,000 Euros\n",
+    "( Seven Hundred and Fifty Thousand Euros) in cash credited to\n",
+    "file with\n",
+    "    REFERENCE NUMBER: ESP/WIN/008/05/10/MA;\n",
+    "    WINNING NUMBER : 14-17-24-34-37-45-16\n",
+    "    BATCH NUMBERS :\n",
+    "    EULO/1007/444/606/08;\n",
+    "    SERIAL NUMBER: 45327\n",
+    "and PROMOTION DATE: 13th June. 2009\n",
+    "\n",
+    "To claim your winning prize, you are to contact the appointed\n",
+    "agent below as soon as possible for the immediate release of your\n",
+    "winnings with the below details.\n",
+    "\n",
+    "Line replaced for fuzz = 2\n",
+    "Line replaced for fuzz = 2\n",
+  ]
+
+  # Set mu contents
+  svntest.main.file_write(mu_path, ''.join(mu_contents))
+  expected_output = svntest.wc.State(wc_dir, {
+    'A/mu'       : Item(verb='Sending'),
+    })
+  expected_status = svntest.actions.get_virginal_state(wc_dir, 1)
+  expected_status.tweak('A/mu', wc_rev=2)
+  svntest.actions.run_and_verify_commit(wc_dir, expected_output,
+                                      expected_status, None, wc_dir)
+
+  unidiff_patch = [
+    "Index: mu\n",
+    "===================================================================\n",
+    "--- A/mu\t(revision 0)\n",
+    "+++ A/mu\t(revision 0)\n",
+    "@@ -1,6 +1,7 @@\n",
+    " Dear internet user,\n",
+    " \n",
+    " We wish to congratulate you over your email success in our computer\n",
+    "+A new line here\n",
+    " Balloting. This is a Millennium Scientific Electronic Computer Draw\n",
+    " in which email addresses were used. All participants were selected\n",
+    " through a computer ballot system drawn from over 100,000 company\n",
+    "@@ -7,6 +8,7 @@\n",
+    " and 50,000,000 individual email addresses from all over the world.\n",
+    " \n",
+    " Your email address drew and have won the sum of  750,000 Euros\n",
+    "+Another new line\n",
+    " ( Seven Hundred and Fifty Thousand Euros) in cash credited to\n",
+    " file with\n",
+    "     REFERENCE NUMBER: ESP/WIN/008/05/10/MA;\n",
+    "@@ -19,6 +20,7 @@\n",
+    " To claim your winning prize, you are to contact the appointed\n",
+    " agent below as soon as possible for the immediate release of your\n",
+    " winnings with the below details.\n",
+    "+A third new line\n",
+    " \n",
+    " Again, we wish to congratulate you over your email success in our\n"
+    " computer Balloting.\n"
+  ]
+
+  svntest.main.file_write(patch_file_path, ''.join(unidiff_patch))
+
+  mu_contents = [
+    "Line replaced for fuzz = 1\n",
+    "\n",
+    "We wish to congratulate you over your email success in our computer\n",
+    "A new line here\n",
+    "Balloting. This is a Millennium Scientific Electronic Computer Draw\n",
+    "in which email addresses were used. All participants were selected\n",
+    "through a computer ballot system drawn from over 100,000 company\n",
+    "and 50,000,000 individual email addresses from all over the world.\n",
+    "Line replaced for fuzz = 2 with only the second context line changed\n",
+    "Your email address drew and have won the sum of  750,000 Euros\n",
+    "Another new line\n",
+    "( Seven Hundred and Fifty Thousand Euros) in cash credited to\n",
+    "file with\n",
+    "    REFERENCE NUMBER: ESP/WIN/008/05/10/MA;\n",
+    "    WINNING NUMBER : 14-17-24-34-37-45-16\n",
+    "    BATCH NUMBERS :\n",
+    "    EULO/1007/444/606/08;\n",
+    "    SERIAL NUMBER: 45327\n",
+    "and PROMOTION DATE: 13th June. 2009\n",
+    "\n",
+    "To claim your winning prize, you are to contact the appointed\n",
+    "agent below as soon as possible for the immediate release of your\n",
+    "winnings with the below details.\n",
+    "A third new line\n",
+    "\n",
+    "Line replaced for fuzz = 2\n",
+    "Line replaced for fuzz = 2\n",
+  ]
+
+  expected_output = [
+    'U         %s\n' % os.path.join(wc_dir, 'A', 'mu'),
+  ]
+  expected_disk = svntest.main.greek_state.copy()
+  expected_disk.tweak('A/mu', contents=''.join(mu_contents))
+
+  expected_status = svntest.actions.get_virginal_state(wc_dir, 1)
+  expected_status.tweak('A/mu', status='M ', wc_rev=2)
+
+  expected_skip = wc.State('', { })
+
+  svntest.actions.run_and_verify_patch(wc_dir, os.path.abspath(patch_file_path),
+                                       expected_output,
+                                       expected_disk,
+                                       expected_status,
+                                       expected_skip,
+                                       None, # expected err
+                                       1, # check-props
+                                       1) # dry-run
+
 def patch_keywords(sbox):
   "apply a patch containing keywords"
 
@@ -1042,6 +1172,7 @@
               patch_add_new_dir,
               patch_reject,
               patch_keywords,
+              patch_with_fuzz,
             ]
 
 if __name__ == '__main__':
Index: subversion/include/private/svn_diff_private.h
===================================================================
--- subversion/include/private/svn_diff_private.h	(revision 903774)
+++ subversion/include/private/svn_diff_private.h	(arbetskopia)
@@ -81,6 +81,10 @@
   svn_linenum_t original_length;
   svn_linenum_t modified_start;
   svn_linenum_t modified_length;
+
+  /* Number of lines starting with ' '. */
+  svn_linenum_t context_before;
+  svn_linenum_t context_after;
 } svn_hunk_t;
 
 /* Data type to manage parsing of patches. */
Index: subversion/libsvn_client/patch.c
===================================================================
--- subversion/libsvn_client/patch.c	(revision 903774)
+++ subversion/libsvn_client/patch.c	(arbetskopia)
@@ -52,6 +52,9 @@
 
   /* Whether this hunk has been rejected. */
   svn_boolean_t rejected;
+
+  /* The number of lines at start and end that is allowed to be ignored. */
+  int fuzz;
 } hunk_info_t;
 
 typedef struct {
@@ -596,10 +599,11 @@
  * Do temporary allocations in POOL. */
 static svn_error_t *
 match_hunk(svn_boolean_t *matched, patch_target_t *target,
-           const svn_hunk_t *hunk, apr_pool_t *pool)
+           const svn_hunk_t *hunk, int fuzz, apr_pool_t *pool)
 {
   svn_stringbuf_t *hunk_line;
   const char *target_line;
+  int lines_read = 0;
   svn_linenum_t saved_line;
   svn_boolean_t hunk_eof;
   svn_boolean_t lines_matched;
@@ -630,9 +634,22 @@
                                            eol_str, FALSE,
                                            target->keywords, FALSE,
                                            iterpool));
+      lines_read++;
+
+
       SVN_ERR(read_line(target, &target_line, iterpool, iterpool));
+
+      SVN_DBG(("fuzz = %d, lines_read = %d '%s'\n", fuzz, lines_read, target_line));
       if (! hunk_eof)
-        lines_matched = ! strcmp(hunk_line_translated, target_line);
+        {
+          if (lines_read <= fuzz && hunk->context_before > fuzz)
+            lines_matched = TRUE;
+          else if (lines_read > hunk->original_length - fuzz &&
+                   hunk->context_after > fuzz)
+            lines_matched = TRUE;
+          else
+            lines_matched = ! strcmp(hunk_line_translated, target_line);
+        }
     }
   while (lines_matched && ! (hunk_eof || target->eof));
 
@@ -670,7 +687,7 @@
 static svn_error_t *
 scan_for_match(svn_linenum_t *matched_line, patch_target_t *target,
                const svn_hunk_t *hunk, svn_boolean_t match_first,
-               svn_linenum_t upper_line, apr_pool_t *pool)
+               svn_linenum_t upper_line, int fuzz, apr_pool_t *pool)
 {
   apr_pool_t *iterpool;
 
@@ -684,7 +701,7 @@
 
       svn_pool_clear(iterpool);
 
-      SVN_ERR(match_hunk(&matched, target, hunk, iterpool));
+      SVN_ERR(match_hunk(&matched, target, hunk, fuzz, iterpool));
       if (matched)
         {
           svn_boolean_t taken = FALSE;
@@ -727,7 +744,7 @@
  * Do temporary allocations in POOL. */
 static svn_error_t *
 get_hunk_info(hunk_info_t **hi, patch_target_t *target,
-              const svn_hunk_t *hunk, apr_pool_t *result_pool,
+              const svn_hunk_t *hunk, int fuzz, apr_pool_t *result_pool,
               apr_pool_t *scratch_pool)
 {
   svn_linenum_t matched_line;
@@ -739,6 +756,7 @@
    * file in the WC. Don't bother matching hunks in that case, since
    * the hunk applies at line 1. */
   matched_line = 1;
+
   if (hunk->original_start > 0 && target->kind == svn_node_file)
     {
       svn_linenum_t saved_line = target->current_line;
@@ -748,7 +766,7 @@
        * should be going. */
       SVN_ERR(seek_to_line(target, hunk->original_start, scratch_pool));
       SVN_ERR(scan_for_match(&matched_line, target, hunk, TRUE,
-                             hunk->original_start + 1, scratch_pool));
+                             hunk->original_start + 1, fuzz, scratch_pool));
       if (matched_line != hunk->original_start)
         {
           /* Scan the whole file again from the start. */
@@ -757,7 +775,7 @@
           /* Scan forward towards the hunk's line and look for a line
            * where the hunk matches. */
           SVN_ERR(scan_for_match(&matched_line, target, hunk, FALSE,
-                                 hunk->original_start, scratch_pool));
+                                 hunk->original_start, fuzz, scratch_pool));
 
           /* In tie-break situations, we arbitrarily prefer early matches
            * to save us from scanning the rest of the file. */
@@ -766,7 +784,7 @@
               /* Scan forward towards the end of the file and look
                * for a line where the hunk matches. */
               SVN_ERR(scan_for_match(&matched_line, target, hunk, TRUE, 0,
-                                     scratch_pool));
+                                     fuzz, scratch_pool));
             }
         }
 
@@ -778,6 +796,7 @@
   (*hi)->hunk = hunk;
   (*hi)->matched_line = matched_line;
   (*hi)->rejected = FALSE;
+  (*hi)->fuzz = fuzz;
 
   return SVN_NO_ERROR;
 }
@@ -833,13 +852,18 @@
 /* Copy HUNK_TEXT into TARGET, line by line, such that the line filter
  * and transformation callbacks set on HUNK_TEXT by the diff parsing
  * code in libsvn_diff will trigger. ABSPATH is the absolute path to the
- * file underlying TARGET. */
+ * file underlying TARGET. Do not copy the lines that is within FUZZ steps
+ * from the beginning or end of hunk unless NR_OF_LINES is set to 0. Then we
+ * copy each line of HUNK_TEXT. */
 static svn_error_t *
 copy_hunk_text(svn_stream_t *hunk_text, svn_stream_t *target,
-               const char *abspath, apr_pool_t *scratch_pool)
+               const char *abspath, int fuzz, int nr_of_lines,
+               apr_pool_t *scratch_pool)
 {
   svn_boolean_t eof;
   apr_pool_t *iterpool;
+  int read_lines = 0;
+  SVN_DBG(("fuzz = %d, nr_of_lines=  %d\n", fuzz, nr_of_lines));
 
   iterpool = svn_pool_create(scratch_pool);
   do
@@ -851,7 +875,11 @@
 
       SVN_ERR(svn_stream_readline_detect_eol(hunk_text, &line, &eol_str,
                                              &eof, iterpool));
-      if (! eof)
+      read_lines++;
+      SVN_DBG(("eof = %d, read_lines = %d fuzz = %d, nr_of_lines = %d line = '%s\n", 
+               eof, read_lines , fuzz, nr_of_lines, line->data));
+
+      if (! eof && nr_of_lines == 0)
         {
           if (line->len >= 1)
             SVN_ERR(try_stream_write(target, abspath, line->data, line->len,
@@ -860,6 +888,18 @@
             SVN_ERR(try_stream_write(target, abspath, eol_str, strlen(eol_str),
                                      iterpool));
         }
+
+      else if (! eof && (((read_lines > fuzz) && (read_lines <= nr_of_lines - fuzz))
+                         || nr_of_lines == 0)) 
+        {
+          SVN_DBG(("line to be inserted = '%s'\n", line->data));
+          if (line->len >= 1)
+            SVN_ERR(try_stream_write(target, abspath, line->data, line->len,
+                                     iterpool));
+          if (eol_str)
+            SVN_ERR(try_stream_write(target, abspath, eol_str, strlen(eol_str),
+                                     iterpool));
+        }
     }
   while (! eof);
   svn_pool_destroy(iterpool);
@@ -887,7 +927,8 @@
   SVN_ERR(svn_stream_write(target->reject, hunk_header, &len));
 
   SVN_ERR(copy_hunk_text(hi->hunk->diff_text, target->reject,
-                         target->reject_path, pool));
+                         target->reject_path, 0,
+                         0, pool));
 
   target->had_rejects = TRUE;
   hi->rejected = TRUE;
@@ -895,15 +936,20 @@
   return SVN_NO_ERROR;
 }
 
-/* Apply a hunk described by hunk info HI to a patch TARGET.
+/* Apply a hunk described by hunk info HI to a patch TARGET. If we have FUZZ
+ * use the lines from the target for those lines instead of the hunk lines.
  * Do all allocations in POOL. */
 static svn_error_t *
-apply_one_hunk(patch_target_t *target, hunk_info_t *hi, apr_pool_t *pool)
+apply_one_hunk(patch_target_t *target, hunk_info_t *hi, 
+               apr_pool_t *pool)
 {
+  svn_linenum_t end_line_nr;
+  SVN_DBG(("apply_one_hunk() fuzz=%d\n", hi->fuzz));
+
   if (target->kind == svn_node_file)
     {
       /* Move forward to the hunk's line, copying data as we go. */
-      SVN_ERR(copy_lines_to_target(target, hi->matched_line, pool));
+      SVN_ERR(copy_lines_to_target(target, hi->matched_line + hi->fuzz, pool));
       if (target->eof)
         {
           /* File is shorter than it should be. */
@@ -912,14 +958,17 @@
         }
 
       /* Skip the target's version of the hunk. */
+      end_line_nr = target->current_line + 
+        hi->hunk->original_length - (2 * hi->fuzz);
       SVN_ERR(seek_to_line(target,
-                           target->current_line + hi->hunk->original_length,
+                           end_line_nr,
                            pool));
     }
 
   /* Copy the patched hunk text into the patched stream. */
   SVN_ERR(copy_hunk_text(hi->hunk->modified_text, target->patched,
-                         target->patched_path, pool));
+                         target->patched_path, hi->fuzz, 
+                         hi->hunk->modified_length, pool));
 
   return SVN_NO_ERROR;
 }
@@ -1026,6 +1075,7 @@
   apr_finfo_t working_file;
   apr_finfo_t patched_file;
   int i;
+  int MAX_FUZZ = 2;
 
   SVN_ERR(init_patch_target(&target, patch, abs_wc_path, ctx, strip_count,
                             pool, pool));
@@ -1044,13 +1094,21 @@
     {
       svn_hunk_t *hunk;
       hunk_info_t *hi;
+      int fuzz = 0;
 
       svn_pool_clear(iterpool);
 
       hunk = APR_ARRAY_IDX(patch->hunks, i, svn_hunk_t *);
 
-      /* Determine the line the hunk should be applied at. */
-      SVN_ERR(get_hunk_info(&hi, target, hunk, pool, iterpool));
+      /* Determine the line the hunk should be applied at. If no match is
+       * found initially, try with fuzz. */
+      do 
+        {
+          SVN_ERR(get_hunk_info(&hi, target, hunk, fuzz, pool, iterpool));
+          fuzz++;
+        }
+      while (hi->matched_line == 0 && fuzz <= MAX_FUZZ);
+
       if (hi->matched_line == 0)
         {
           /* The hunk does not apply, reject it. */

Reply via email to