There's a lot of weird characters which show up in LKML archives which we did not support before. Furthermore, allow spaces before the '>' in the From: line as at least some non-spam poster used it. --- lib/PublicInbox/Address.pm | 3 ++- t/address.t | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-)
diff --git a/lib/PublicInbox/Address.pm b/lib/PublicInbox/Address.pm index f334ade..548f417 100644 --- a/lib/PublicInbox/Address.pm +++ b/lib/PublicInbox/Address.pm @@ -8,7 +8,8 @@ use warnings; # just enough to make thing sanely displayable and pass to git sub emails { - ($_[0] =~ /([\w\.\+=\-]+\@[\w\.\-]+)>?\s*(?:\(.*?\))?(?:,\s*|\z)/g) + ($_[0] =~ /([\w\.\+=\?"\(\)\-!#\$%&'\*\/\^\`\|\{\}~]+\@[\w\.\-\(\)]+) + (?:\s[^>]*)?>?\s*(?:\(.*?\))?(?:,\s*|\z)/gx) } sub names { diff --git a/t/address.t b/t/address.t index e35e4f8..eced5c4 100644 --- a/t/address.t +++ b/t/address.t @@ -9,8 +9,9 @@ is_deeply([qw(e...@example.com e...@example.org)], [PublicInbox::Address::emails('User <e...@example.com>, e...@example.org')], 'address extraction works as expected'); -is_deeply([PublicInbox::Address::emails('"e...@example.com" <e...@example.com>')], - [qw(e...@example.com)]); +is_deeply(['u...@example.com'], + [PublicInbox::Address::emails('<u...@example.com (Comment)>')], + 'comment after domain accepted before >'); my @names = PublicInbox::Address::names( 'User <e@e>, e@e, "John A. Doe" <j@d>, <x@x>'); -- EW -- unsubscribe: meta+unsubscr...@public-inbox.org archive: https://public-inbox.org/meta/