This simplifies the check and ensures returned text is Perl "utf8"
text (that is, Perl's internal "utf8" and not the strict "UTF-8".
diff --git a/lib/PublicInbox/MsgIter.pm b/lib/PublicInbox/MsgIter.pm
index e2819523..9c6581cc 100644
--- a/lib/PublicInbox/MsgIter.pm
+++ b/lib/PublicInbox/MsgIter.pm
@@ -90,12 +90,8 @@ sub msg_part_text ($$) {
# Try to see if it's printable text that we can index
# and display:
$s = $part->body;
- if ($s =~ /[^\p{XPosixPrint}\s]/s) {
- utf8::decode($s);
- $s =~ /[^\p{XPosixPrint}\s]/s ? undef($s) : undef($err);
- } else {
- undef($err);
- }
+ utf8::decode($s);
+ undef($s =~ /[^\p{XPosixPrint}\s]/s ? $s : $err);
}
($s, $err);
}
diff --git a/t/msg_iter.t b/t/msg_iter.t
index 6c52eec8..ae3594da 100644
--- a/t/msg_iter.t
+++ b/t/msg_iter.t
@@ -121,6 +121,7 @@ EOM
push @parts, $s;
});
$expect =~ s/\n/\r\n/sg;
+ utf8::decode($expect); # aka "bytes2str"
is_deeply(\@parts, [ "blah\r\n", $expect ],
'fallback to application/octet-stream as UTF-8 text');
--
unsubscribe: one-click, see List-Unsubscribe header
archive: https://public-inbox.org/meta/