[SQUASH] msg_part_text: discover text in application/octet-stream

Eric Wong Thu, 11 Mar 2021 16:31:32 -0800

This simplifies the check and ensures returned text is Perl "utf8"
text (that is, Perl's internal "utf8" and not the strict "UTF-8".


diff --git a/lib/PublicInbox/MsgIter.pm b/lib/PublicInbox/MsgIter.pm
index e2819523..9c6581cc 100644
--- a/lib/PublicInbox/MsgIter.pm
+++ b/lib/PublicInbox/MsgIter.pm
@@ -90,12 +90,8 @@ sub msg_part_text ($$) {
                # Try to see if it's printable text that we can index
                # and display:
                $s = $part->body;
-               if ($s =~ /[^\p{XPosixPrint}\s]/s) {
-                       utf8::decode($s);
-                       $s =~ /[^\p{XPosixPrint}\s]/s ? undef($s) : undef($err);
-               } else {
-                       undef($err);
-               }
+               utf8::decode($s);
+               undef($s =~ /[^\p{XPosixPrint}\s]/s ? $s : $err);
        }
        ($s, $err);
 }
diff --git a/t/msg_iter.t b/t/msg_iter.t
index 6c52eec8..ae3594da 100644
--- a/t/msg_iter.t
+++ b/t/msg_iter.t
@@ -121,6 +121,7 @@ EOM
                push @parts, $s;
        });
        $expect =~ s/\n/\r\n/sg;
+       utf8::decode($expect); # aka "bytes2str"
        is_deeply(\@parts, [ "blah\r\n", $expect ],
                'fallback to application/octet-stream as UTF-8 text');
 
--
unsubscribe: one-click, see List-Unsubscribe header
archive: https://public-inbox.org/meta/

[SQUASH] msg_part_text: discover text in application/octet-stream

Reply via email to