patch 9.2.0174: diff: inline word-diffs can be fragmented

Commit: 
https://github.com/vim/vim/commit/42c6686c78d39843f71dba989a8ea59bc6975132
Author: HarshK97 <[email protected]>
Date:   Mon Mar 16 19:23:45 2026 +0000

    patch 9.2.0174: diff: inline word-diffs can be fragmented
    
    Problem:  When using 'diffopt=inline:word', lines were excessively
              fragmented with punctuation creating separate highlight
              blocks, making it harder to read the diffs.
    Solution: Added 'diff_refine_inline_word_highlight()' to merge
              adjacent diff blocks that are separated by small gaps of
              non-word characters (up to 5 bytes by default) (HarshK97).
    
    When using inline:word diff mode, adjacent changed words separated by
    punctuation or whitespace are now merged into a single highlight block
    if the gap between them contains fewer than 5 non-word characters.
    
    This creates more readable diffs and closely matches GitHub's own diff
    display.
    
    closes: #19098
    
    Signed-off-by: HarshK97 <[email protected]>
    Signed-off-by: Christian Brabandt <[email protected]>

diff --git a/runtime/doc/options.txt b/runtime/doc/options.txt
index 245089a3b..bddfa9921 100644
--- a/runtime/doc/options.txt
+++ b/runtime/doc/options.txt
@@ -1,4 +1,4 @@
-*options.txt*  For Vim version 9.2.  Last change: 2026 Mar 13
+*options.txt*  For Vim version 9.2.  Last change: 2026 Mar 16
 
 
                  VIM REFERENCE MANUAL    by Bram Moolenaar
@@ -3210,7 +3210,10 @@ A jump table for the options with a short description 
can be found at |Q_op|.
                                        difference.  Non-alphanumeric
                                        multi-byte characters such as emoji
                                        and CJK characters are considered
-                                       individual words.
+                                       individual words.  Small gaps of
+                                       non-word characters (5 bytes or less)
+                                       between changes are merged into a
+                                       single highlight block.
 
                internal        Use the internal diff library.  This is
                                ignored when 'diffexpr' is set.  *E960*
diff --git a/runtime/doc/version9.txt b/runtime/doc/version9.txt
index cee00a282..3a722afde 100644
--- a/runtime/doc/version9.txt
+++ b/runtime/doc/version9.txt
@@ -1,4 +1,4 @@
-*version9.txt* For Vim version 9.2.  Last change: 2026 Mar 13
+*version9.txt* For Vim version 9.2.  Last change: 2026 Mar 16
 
 
                  VIM REFERENCE MANUAL    by Bram Moolenaar
@@ -52588,6 +52588,13 @@ Popups ~
 - 'previewpopup' supports the same values as 'completepopup' (except for
   "align").
 
+Diff mode ~
+---------
+When using inline word diffs, adjacent changes are now merged if the gap
+between them is small (5 bytes or less) and contains only non-word characters.
+This prevents fragmented highlighting when only whitespace or punctuation
+separates changes.
+
 Other ~
 -----
 - The new |xdg.vim| script for full XDG compatibility is included.
diff --git a/src/diff.c b/src/diff.c
index 2c359a92f..176394724 100644
--- a/src/diff.c
+++ b/src/diff.c
@@ -49,6 +49,7 @@ static int diff_need_update = FALSE; // ex_diffupdate needs 
to be called
 static int     diff_flags = DIFF_INTERNAL | DIFF_FILLER | DIFF_CLOSE_OFF | 
DIFF_INLINE_CHAR;
 
 static long diff_algorithm = XDF_INDENT_HEURISTIC;
+static int diff_word_gap = 5;  // gap threshold for inline:word
 
 # define LBUFLEN 50            // length of line in diff file
 
@@ -3391,6 +3392,131 @@ diff_refine_inline_char_highlight(diff_T *dp_orig, 
garray_T *linemap, int idx1)
     } while (pass++ < 4); // use limited number of passes to avoid excessive 
looping
 }
 
+/*
+ * Refine inline word diff blocks by merging blocks that are only separated
+ * by whitespace or punctuation. This creates more coherent highlighting.
+ */
+    static void
+diff_refine_inline_word_highlight(diff_T *dp_orig, garray_T *linemap, int idx1,
+                                 linenr_T start_lnum)
+{
+    int pass = 1;
+    do
+    {
+       diff_T *dp = dp_orig;
+
+       while (dp != NULL && dp->df_next != NULL)
+       {
+           // Only merge blocks on the same line
+           if (dp->df_lnum[idx1] + dp->df_count[idx1] - 1 >= linemap[idx1]. 
ga_len
+                   || dp->df_next->df_lnum[idx1] - 1 >= linemap[idx1]. ga_len)
+           {
+               dp = dp->df_next;
+               continue;
+           }
+
+           linemap_entry_T *entry1 =
+               &((linemap_entry_T *)linemap[idx1].ga_data)[dp->df_lnum[idx1] + 
dp->df_count[idx1] - 2];
+           linemap_entry_T *entry2 =
+               &((linemap_entry_T 
*)linemap[idx1].ga_data)[dp->df_next->df_lnum[idx1] - 1];
+
+           // Skip if blocks are on different lines
+           if (entry1->lineoff != entry2->lineoff)
+           {
+               dp = dp->df_next;
+               continue;
+           }
+
+           // Calculate the gap between blocks
+           int gap_start = entry1->byte_start + entry1->num_bytes;
+           int gap_end = entry2->byte_start;
+           int gap_size = gap_end - gap_start;
+
+           // Merge adjacent diff blocks separated by small gaps to reduce 
visual
+           // fragmentation. Gap threshold is set to 5 bytes which handles most
+           // common separators (spaces, punctuation, short variable names) 
while
+           // still preserving visually distinct changes.
+           if (gap_size <= 0 || gap_size > diff_word_gap)
+           {
+               dp = dp->df_next;
+               continue;
+           }
+
+           // Get the text between the two blocks
+           char_u *line = ml_get_buf(curtab->tp_diffbuf[idx1],
+                   start_lnum + entry1->lineoff, FALSE);
+           char_u *gap_text = line + gap_start;
+
+           // Check if gap contains only whitespace and/or punctuation
+           bool only_non_word = true;
+           bool has_content = false;
+
+           for (int i = 0; i < gap_size && gap_text[i] != NUL; i++)
+           {
+               has_content = true;
+               int char_class = mb_get_class_buf(gap_text + i,
+                               curtab->tp_diffbuf[idx1]);
+               // class 2 is word characters, if we find any, don't merge
+               if (char_class == 2)
+               {
+                   only_non_word = false;
+                   break;
+               }
+           }
+
+           // Merge if the gap is small and contains only non-word characters
+           if (has_content && only_non_word)
+           {
+               long total_change_bytes = 0;
+               for (int i = 0; i < DB_COUNT; i++)
+               {
+                   if (curtab->tp_diffbuf[i] != NULL)
+                   {
+                       // count bytes in the first block
+                       for (int k = 0; k < dp->df_count[i]; k++)
+                       {
+                           int idx = dp->df_lnum[i] + k - 1;
+                           if (idx < linemap[i].ga_len)
+                               total_change_bytes +=
+                                   ((linemap_entry_T 
*)linemap[i].ga_data)[idx].num_bytes;
+                       }
+                       // count bytes in the next block
+                       for (int k = 0; k < dp->df_next->df_count[i]; k++)
+                       {
+                           int idx = dp->df_next->df_lnum[i] + k - 1;
+                           if (idx < linemap[i].ga_len)
+                               total_change_bytes +=
+                                   ((linemap_entry_T 
*)linemap[i].ga_data)[idx].num_bytes;
+                       }
+                   }
+               }
+
+               if (total_change_bytes >= gap_size * 2)
+               {
+                   // Merge the blocks by extending the first block to include 
the next
+                   for (int i = 0; i < DB_COUNT; i++)
+                   {
+                       if (curtab->tp_diffbuf[i] != NULL)
+                       {
+                           dp->df_count[i] = dp->df_next->df_lnum[i]
+                               + dp->df_next->df_count[i] - dp->df_lnum[i];
+                       }
+                   }
+
+                   diff_T *dp_next = dp->df_next;
+                   dp->df_next = dp_next->df_next;
+                   clear_diffblock(dp_next);
+
+                   // Don't advance dp, check if can merge with the next block 
too
+                   continue;
+               }
+           }
+
+           dp = dp->df_next;
+       }
+    } while (pass++ < 4);  // use limited number of passes to avoid excessive 
looping
+}
+
 /*
  * Find the inline difference within a diff block among different buffers.  Do
  * this by splitting each block's content into characters or words, and then
@@ -3656,7 +3782,9 @@ diff_find_change_inline_diff(
     }
     diff_T *new_diff = curtab->tp_first_diff;
 
-    if (diff_flags & DIFF_INLINE_CHAR && file1_idx != -1)
+    if (diff_flags & DIFF_INLINE_WORD && file1_idx != -1)
+       diff_refine_inline_word_highlight(new_diff, linemap, file1_idx, 
dp->df_lnum[file1_idx]);
+    else if (diff_flags & DIFF_INLINE_CHAR && file1_idx != -1)
        diff_refine_inline_char_highlight(new_diff, linemap, file1_idx);
 
     // After the diff, use the linemap to obtain the original line/col of the
diff --git a/src/testdir/dumps/Test_diff_inline_04.dump 
b/src/testdir/dumps/Test_diff_inline_04.dump
index ed4f5a2a1..e009998b8 100644
--- a/src/testdir/dumps/Test_diff_inline_04.dump
+++ b/src/testdir/dumps/Test_diff_inline_04.dump
@@ -1,4 +1,4 @@
-| +0#0000e05#a8a8a8255@1|a+2#0000000#ff404010|b|c|d|e|f| 
+0&#ffd7ff255|g+2&#ff404010|h|i| +0&#ffd7ff255|j+2&#ff404010|k| 
+0&#ffd7ff255|n| @19||+1&#ffffff0| 
+0#0000e05#a8a8a8255@1|a+2#0000000#ff404010|B|c|e|f| 
+0&#ffd7ff255|g+2&#ff404010|H|i| +0&#ffd7ff255|l+2&#ff404010|m| 
+0&#ffd7ff255|n| @20
+| +0#0000e05#a8a8a8255@1|a+2#0000000#ff404010|b|c|d|e|f| |g|h|i| |j|k| 
+0&#ffd7ff255|n| @19||+1&#ffffff0| 
+0#0000e05#a8a8a8255@1|a+2#0000000#ff404010|B|c|e|f| |g|H|i| |l|m| 
+0&#ffd7ff255|n| @20
 | +0#0000e05#a8a8a8255@1|x+0#0000000#5fd7ff255| @33||+1&#ffffff0| 
+0#0000e05#a8a8a8255@1|-+0#4040ff13#afffff255@34
 | +0#0000e05#a8a8a8255@1|y+0#0000000#ffffff0| @33||+1&&| 
+0#0000e05#a8a8a8255@1|y+0#0000000#ffffff0| @33
 | +0#0000e05#a8a8a8255@1|-+0#4040ff13#afffff255@34||+1#0000000#ffffff0| 
+0#0000e05#a8a8a8255@1|z+0#0000000#5fd7ff255| @33
diff --git a/src/testdir/dumps/Test_diff_inline_multibuffer_04.dump 
b/src/testdir/dumps/Test_diff_inline_multibuffer_04.dump
index 3da504b6b..da6ee8415 100644
--- a/src/testdir/dumps/Test_diff_inline_multibuffer_04.dump
+++ b/src/testdir/dumps/Test_diff_inline_multibuffer_04.dump
@@ -1,4 +1,4 @@
-| 
+0#0000e05#a8a8a8255@1|T+2#0000000#ff404010|h|i|s|+|i|s|=+0&#ffd7ff255|a+2&#ff404010|-+0&#ffd7ff255|s|e|t|e|n|c|e|
 @5||+1&#ffffff0| 
+0#0000e05#a8a8a8255@1|T+2#0000000#ff404010|h|i|s|+|i|s|=+0&#ffd7ff255|a+2&#ff404010|n|o|t|h|e|r|-+0&#ffd7ff255|s|e|t|e|n|c||+1&#ffffff0|
 
+0#0000e05#a8a8a8255@1|T+2#0000000#ff404010|h|a|t|+|i|s|=+0&#ffd7ff255|a+2&#ff404010|-+0&#ffd7ff255|s|e|t|e|n|c|e|
 @4
+| 
+0#0000e05#a8a8a8255@1|T+2#0000000#ff404010|h|i|s|+|i|s|=|a|-+0&#ffd7ff255|s|e|t|e|n|c|e|
 @5||+1&#ffffff0| 
+0#0000e05#a8a8a8255@1|T+2#0000000#ff404010|h|i|s|+|i|s|=|a|n|o|t|h|e|r|-+0&#ffd7ff255|s|e|t|e|n|c||+1&#ffffff0|
 
+0#0000e05#a8a8a8255@1|T+2#0000000#ff404010|h|a|t|+|i|s|=|a|-+0&#ffd7ff255|s|e|t|e|n|c|e|
 @4
 |~+0#4040ff13#ffffff0| @23||+1#0000000&|~+0#4040ff13&| 
@22||+1#0000000&|~+0#4040ff13&| @22
 |~| @23||+1#0000000&|~+0#4040ff13&| @22||+1#0000000&|~+0#4040ff13&| @22
 |~| @23||+1#0000000&|~+0#4040ff13&| @22||+1#0000000&|~+0#4040ff13&| @22
diff --git a/src/testdir/dumps/Test_diff_inline_multiline_02.dump 
b/src/testdir/dumps/Test_diff_inline_multiline_02.dump
index c7bf4cb6e..9a0893842 100644
--- a/src/testdir/dumps/Test_diff_inline_multiline_02.dump
+++ b/src/testdir/dumps/Test_diff_inline_multiline_02.dump
@@ -1,6 +1,6 @@
-| +0#0000e05#a8a8a8255@1|t+0#0000000#ffd7ff255|h|i|s| @2|i|s| 
@25||+1&#ffffff0| +0#0000e05#a8a8a8255@1|t+0#0000000#ffd7ff255|h|i|s| |i|s| 
|s+2&#ff404010|o|m|e| +0&#ffd7ff255|t+2&#ff404010|e|s|t| +0&#ffd7ff255@17
-| +0#0000e05#a8a8a8255@1|s+2#0000000#ff404010|o|m|e|t|e|s|t| 
+0&#ffd7ff255|t+2&#ff404010|e|x|t| +0&#ffd7ff255|f+2&#ff404010|o@1| 
+0&#ffd7ff255@17||+1&#ffffff0| 
+0#0000e05#a8a8a8255@1|t+2#0000000#ff404010|e|x|t|s| +0&#ffd7ff255@29
-| +0#0000e05#a8a8a8255@1|b+2#0000000#ff404010|a|z| 
+0&#ffd7ff255|a+2&#ff404010|b|c| +0&#ffd7ff255|d+2&#ff404010|e|f| 
+0&#ffd7ff255@23||+1&#ffffff0| +0#0000e05#a8a8a8255@1|f+2#0000000#ff404010|o@1| 
+0&#ffd7ff255|b+2&#ff404010|a|r| +0&#ffd7ff255|a+2&#ff404010|b|X| 
+0&#ffd7ff255|Y+2&#ff404010|e|f| +0&#ffd7ff255@19
+| +0#0000e05#a8a8a8255@1|t+0#0000000#ffd7ff255|h|i|s| @2|i|s| 
@25||+1&#ffffff0| +0#0000e05#a8a8a8255@1|t+0#0000000#ffd7ff255|h|i|s| |i|s| 
|s+2&#ff404010|o|m|e| |t|e|s|t| +0&#ffd7ff255@17
+| +0#0000e05#a8a8a8255@1|s+2#0000000#ff404010|o|m|e|t|e|s|t| |t|e|x|t| |f|o@1| 
+0&#ffd7ff255@17||+1&#ffffff0| 
+0#0000e05#a8a8a8255@1|t+2#0000000#ff404010|e|x|t|s| +0&#ffd7ff255@29
+| +0#0000e05#a8a8a8255@1|b+2#0000000#ff404010|a|z| |a|b|c| |d|e|f| 
+0&#ffd7ff255@23||+1&#ffffff0| +0#0000e05#a8a8a8255@1|f+2#0000000#ff404010|o@1| 
|b|a|r| |a|b|X| +0&#ffd7ff255|Y+2&#ff404010|e|f| +0&#ffd7ff255@19
 | +0#0000e05#a8a8a8255@1|o+2#0000000#ff404010|n|e| 
+0&#ffd7ff255@31||+1&#ffffff0| 
+0#0000e05#a8a8a8255@1|o+2#0000000#ff404010|n|e|w|o|r|d| 
+0&#ffd7ff255|a|n|o|t|h|e|r| |w|o|r|d| @14
 | +0#0000e05#a8a8a8255@1|w+2#0000000#ff404010|o|r|d| 
+0&#ffd7ff255|a|n|o|t|h|e|r| |w|o|r|d| @17||+1&#ffffff0| 
+0#0000e05#a8a8a8255@1|-+0#4040ff13#afffff255@34
 | +0#0000e05#a8a8a8255@1|a+0#0000000#5fd7ff255|d@1|i|t|i|o|n|a|l| |l|i|n|e| 
@19||+1&#ffffff0| +0#0000e05#a8a8a8255@1|-+0#4040ff13#afffff255@34
diff --git a/src/testdir/dumps/Test_diff_inline_multiline_04.dump 
b/src/testdir/dumps/Test_diff_inline_multiline_04.dump
index 4aef70cb3..3ab991a53 100644
--- a/src/testdir/dumps/Test_diff_inline_multiline_04.dump
+++ b/src/testdir/dumps/Test_diff_inline_multiline_04.dump
@@ -1,6 +1,6 @@
-| +0#0000e05#a8a8a8255@1|t+0#0000000#ffd7ff255|h|i|s| | 
+0&#4040ff13@1|i+0&#ffd7ff255|s| @25||+1&#ffffff0| 
+0#0000e05#a8a8a8255@1|t+0#0000000#ffd7ff255|h|i|s| |i|s| |s+2&#ff404010|o|m|e| 
+0&#ffd7ff255|t+2&#ff404010|e|s|t| +0&#ffd7ff255@17
-| +0#0000e05#a8a8a8255@1|s+2#0000000#ff404010|o|m|e|t|e|s|t| 
+0&#ffd7ff255|t+2&#ff404010|e|x|t| +0&#ffd7ff255|f|o@1| @17||+1&#ffffff0| 
+0#0000e05#a8a8a8255@1|t+2#0000000#ff404010|e|x|t|s| +0&#ffd7ff255@29
-| +0#0000e05#a8a8a8255@1|b+2#0000000#ff404010|a|z| 
+0&#ffd7ff255|a+2&#ff404010|b|c| +0&#ffd7ff255|d+2&#ff404010|e|f| 
+0&#ffd7ff255@23||+1&#ffffff0| 
+0#0000e05#a8a8a8255@1|f+0#0000000#ffd7ff255|o@1| |b+2&#ff404010|a|r| 
+0&#ffd7ff255|a+2&#ff404010|b|X| +0&#ffd7ff255|Y+2&#ff404010|e|f| 
+0&#ffd7ff255@19
+| +0#0000e05#a8a8a8255@1|t+0#0000000#ffd7ff255|h|i|s| | 
+0&#4040ff13@1|i+0&#ffd7ff255|s| @25||+1&#ffffff0| 
+0#0000e05#a8a8a8255@1|t+0#0000000#ffd7ff255|h|i|s| |i|s| |s+2&#ff404010|o|m|e| 
|t|e|s|t| +0&#ffd7ff255@17
+| +0#0000e05#a8a8a8255@1|s+2#0000000#ff404010|o|m|e|t|e|s|t| |t|e|x|t| 
+0&#ffd7ff255|f|o@1| @17||+1&#ffffff0| 
+0#0000e05#a8a8a8255@1|t+2#0000000#ff404010|e|x|t|s| +0&#ffd7ff255@29
+| +0#0000e05#a8a8a8255@1|b+2#0000000#ff404010|a|z| |a|b|c| |d|e|f| 
+0&#ffd7ff255@23||+1&#ffffff0| 
+0#0000e05#a8a8a8255@1|f+0#0000000#ffd7ff255|o@1| |b+2&#ff404010|a|r| |a|b|X| 
|Y|e|f| +0&#ffd7ff255@19
 | +0#0000e05#a8a8a8255@1|o+2#0000000#ff404010|n|e| 
+0&#ffd7ff255@31||+1&#ffffff0| 
+0#0000e05#a8a8a8255@1|o+2#0000000#ff404010|n|e|w|o|r|d| 
+0&#ffd7ff255|a|n|o|t|h|e|r| |w|o|r|d| @14
 | +0#0000e05#a8a8a8255@1|w+2#0000000#ff404010|o|r|d| 
+0&#ffd7ff255|a|n|o|t|h|e|r| |w|o|r|d| @17||+1&#ffffff0| 
+0#0000e05#a8a8a8255@1|-+0#4040ff13#afffff255@34
 | +0#0000e05#a8a8a8255@1|a+0#0000000#5fd7ff255|d@1|i|t|i|o|n|a|l| |l|i|n|e| 
@19||+1&#ffffff0| +0#0000e05#a8a8a8255@1|-+0#4040ff13#afffff255@34
diff --git a/src/testdir/dumps/Test_diff_inline_word_03.dump 
b/src/testdir/dumps/Test_diff_inline_word_03.dump
index 30efaed3a..a04cf50fa 100644
--- a/src/testdir/dumps/Test_diff_inline_word_03.dump
+++ b/src/testdir/dumps/Test_diff_inline_word_03.dump
@@ -1,4 +1,4 @@
-| 
+0#0000e05#a8a8a8255@1|🚀*0#0000000#ffd7ff255|⛵️*2&#ff404010|一*0&#ffd7ff255|二|三*2&#ff404010|ひ*0&#ffd7ff255|ら|が*0&#4040ff13|な*0&#ffd7ff255|Δ+2&#ff404010|έ|λ|τ|α|
 +0&#ffd7ff255|Δ+2&#ff404010|e|l|t|a| +0&#ffd7ff255|f|o@1|b|a||+1&#ffffff0| 
+0#0000e05#a8a8a8255@1|🚀*0#0000000#ffd7ff255|🛸*2&#ff404010|一*0&#ffd7ff255|二|四*2&#ff404010|ひ*0&#ffd7ff255|ら|な|δ+2&#ff404010|έ|λ|τ|α|
 +0&#ffd7ff255|δ+2&#ff404010|e|l|t|a| +0&#ffd7ff255|f|o@1|b|a|r| 
+| 
+0#0000e05#a8a8a8255@1|🚀*0#0000000#ffd7ff255|⛵️*2&#ff404010|一*0&#ffd7ff255|二|三*2&#ff404010|ひ*0&#ffd7ff255|ら|が*2&#ff404010|な|Δ+&|έ|λ|τ|α|
 |Δ|e|l|t|a| +0&#ffd7ff255|f|o@1|b|a||+1&#ffffff0| 
+0#0000e05#a8a8a8255@1|🚀*0#0000000#ffd7ff255|🛸*2&#ff404010|一*0&#ffd7ff255|二|四*2&#ff404010|ひ*0&#ffd7ff255|ら|な*2&#ff404010|δ+&|έ|λ|τ|α|
 |δ|e|l|t|a| +0&#ffd7ff255|f|o@1|b|a|r| 
 |~+0#4040ff13#ffffff0| @35||+1#0000000&|~+0#4040ff13&| @35
 |~| @35||+1#0000000&|~+0#4040ff13&| @35
 |~| @35||+1#0000000&|~+0#4040ff13&| @35
diff --git a/src/version.c b/src/version.c
index 50f92520a..d40a5f147 100644
--- a/src/version.c
+++ b/src/version.c
@@ -734,6 +734,8 @@ static char *(features[]) =
 
 static int included_patches[] =
 {   /* Add new patch number below this line */
+/**/
+    174,
 /**/
     173,
 /**/

-- 
-- 
You received this message from the "vim_dev" maillist.
Do not top-post! Type your reply below the text you are replying to.
For more information, visit http://www.vim.org/maillist.php

--- 
You received this message because you are subscribed to the Google Groups 
"vim_dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To view this discussion visit 
https://groups.google.com/d/msgid/vim_dev/E1w2DsA-008mxy-8Y%40256bit.org.

Raspunde prin e-mail lui