From: Yi EungJun <[email protected]>
Highlighted string might be broken if the common subsequence is a proper subset
of a multibyte character. For example, if the old string is "진" and the new
string is "지", then we expect the diff is rendered as follows:
-진
+지
but actually it was rendered as follows:
-<EC><A7><84>
+<EC><A7><80>
This fixes the bug by splitting the string by multibyte characters.
---
contrib/diff-highlight/diff-highlight | 25 +++++++++++++++++++++++--
1 file changed, 23 insertions(+), 2 deletions(-)
diff --git a/contrib/diff-highlight/diff-highlight
b/contrib/diff-highlight/diff-highlight
index 08c88bb..2662c1a 100755
--- a/contrib/diff-highlight/diff-highlight
+++ b/contrib/diff-highlight/diff-highlight
@@ -2,6 +2,9 @@
use warnings FATAL => 'all';
use strict;
+use File::Basename;
+use File::Spec::Functions qw( catdir );
+use String::Multibyte;
# Highlight by reversing foreground and background. You could do
# other things like bold or underline if you prefer.
@@ -24,6 +27,8 @@ my @removed;
my @added;
my $in_hunk;
+my $mbcs = get_mbcs();
+
# Some scripts may not realize that SIGPIPE is being ignored when launching the
# pager--for instance scripts written in Python.
$SIG{PIPE} = 'DEFAULT';
@@ -164,8 +169,8 @@ sub highlight_pair {
sub split_line {
local $_ = shift;
- return map { /$COLOR/ ? $_ : (split //) }
- split /($COLOR*)/;
+ return map { /$COLOR/ ? $_ : ($mbcs ? $mbcs->strsplit('', $_) : split
//) }
+ split /($COLOR)/;
}
sub highlight_line {
@@ -211,3 +216,19 @@ sub is_pair_interesting {
$suffix_a !~ /^$BORING*$/ ||
$suffix_b !~ /^$BORING*$/;
}
+
+# Returns an instance of String::Multibyte based on the charset defined by
+# i18n.commitencoding or UTF-8, or undef if String::Multibyte doesn't support
+# the charset.
+sub get_mbcs {
+ my $dir = catdir(dirname($INC{'String/Multibyte.pm'}), 'Multibyte');
+ opendir my $dh, $dir or return;
+ my @mbcs_charsets = grep s/[.]pm\z//, readdir $dh;
+ close $dh;
+ my $expected_charset = `git config i18n.commitencoding` || "UTF-8";
+ $expected_charset =~ s/-//g;
+ my @matches = grep {/^$expected_charset$/i} @mbcs_charsets;
+ my $charset = shift @matches;
+
+ return eval 'String::Multibyte->new($charset)';
+}
--
2.3.2.209.gd67f9d5.dirty
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html