ContentHash currently doesn't convert CRCRLF to LF.  Perhaps it
should, but for now, have diff behavior match the actual
comparison behavior used for dedupe and omit all trailing
whitespace for diff.
---
 lib/PublicInbox/ContentHash.pm | 2 +-
 lib/PublicInbox/MailDiff.pm    | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/lib/PublicInbox/ContentHash.pm b/lib/PublicInbox/ContentHash.pm
index a4f6196f..fc94257c 100644
--- a/lib/PublicInbox/ContentHash.pm
+++ b/lib/PublicInbox/ContentHash.pm
@@ -45,7 +45,7 @@ sub content_dig_i {
        my $ct = $part->content_type || 'text/plain';
        my ($s, undef) = msg_part_text($part, $ct);
        if (defined $s) {
-               $s =~ s/\r\n/\n/gs;
+               $s =~ s/\r\n/\n/gs; # TODO: consider \r+\n to match View
                $s =~ s/\s*\z//s;
                utf8::encode($s);
        } else {
diff --git a/lib/PublicInbox/MailDiff.pm b/lib/PublicInbox/MailDiff.pm
index 7511144c..d9733ed4 100644
--- a/lib/PublicInbox/MailDiff.pm
+++ b/lib/PublicInbox/MailDiff.pm
@@ -11,7 +11,7 @@ use PublicInbox::GitAsyncCat;
 sub write_part { # Eml->each_part callback
        my ($ary, $self) = @_;
        my ($part, $depth, $idx) = @$ary;
-       if ($idx ne '1' || $self->{-raw_hdr}) {
+       if ($idx ne '1' || $self->{-raw_hdr}) { # lei mail-diff --raw-header
                open my $fh, '>', "$self->{curdir}/$idx.hdr" or die "open: $!";
                print $fh ${$part->{hdr}} or die "print $!";
                close $fh or die "close $!";
@@ -20,7 +20,8 @@ sub write_part { # Eml->each_part callback
        my ($s, $err) = msg_part_text($part, $ct);
        my $sfx = defined($s) ? 'txt' : 'bin';
        $s //= $part->body;
-       $s =~ s/\r+\n/\n/sg;
+       $s =~ s/\r\n/\n/gs; # TODO: consider \r+\n to match View
+       $s =~ s/\s*\z//s;
        open my $fh, '>:utf8', "$self->{curdir}/$idx.$sfx" or die "open: $!";
        print $fh $s or die "print $!";
        close $fh or die "close $!";

Reply via email to