On Mon, 1 Sep 2003 14:09:58 -0700, Dave Cross wrote:
> The first problem is identifying why the email ended up in my inbox. I
> need to work out which of the many email addresses in the many headers is
> aimed at me. Here's the algorithm I'm using.
>
> 1/ If there's an 'Envelope-to' header then use that and stop looking.
Convince your local MDA to always insert one (such as qmail's
Delivered-to:). Then you only need this step. The other message headers are
all lies, anyway.
I'm in the fortunate position of being the only user on my mail server, so I
can reject messages which aren't addressed to me at SMTP RCPT time, and I
don't even have to receive them in the first place. Of course, I still get
hundreds of bounces addressed to me, due to the current virus/spam
addressing trend, but this did cut down the flood quite considerably.
> I'm using the Email::* modules but there doesn't seem to be a way to
> extract the actual deliverable email address from the headers.
>
> Should I just write Email::Address and submit it to the Email::* project?
I've written a yapp grammar for my MUA, which (from memory) parses
everything in RFC822 addresses (not necessarily 2822 - it didn't exist)
except for domain literals. While it successfully identifies addresses
correctly, constructing sensible values for comments really only works for
comments which were sensible to start with:
"Dave Cross" <[EMAIL PROTECTED]>
[EMAIL PROTECTED]
name=Dave Cross
comment=
Muhammed.(I am the greatest) Ali @(the)Vegas.WBA
[EMAIL PROTECTED]
name=
comment= (I am the greatest) (the)
At the risk of annoying the list admins, I've attached it to this message,
since I still haven't got round to putting the MUA on CPAN.
--
Peter Haworth [EMAIL PROTECTED]
"The usability of a computer language is inversely proportional to the
number of theoretical axes the language designer tries to grind."
-- Larry Wall# $Revision: 1.9 $
%token tComma tColon tSemi
%token tAngLeft tAngRight
%token tAt tDot
%token tAtom tQuotedString tQuotedPair
%%
addresses:
address
{ [ $_[1] ] }
| addresses tComma address
{ [ @{$_[1]},$_[3] ] }
;
address:
address_
{
$_[0]->ParseComments;
my $data=$_[0]->YYData;
my $addr={
addr => $_[1],
comment => $data->{COMMENT},
text => $data->{TEXT},
name => $data->{NAME},
};
delete $data->{COMMENT};
delete $data->{TEXT};
delete $data->{NAME};
$addr->{name}=~s/^\s+//s;
$addr;
}
;
address_:
group
| mailbox
;
group:
phrase tColon mailboxes tSemi
;
mailboxes:
mailbox
| mailboxes tComma mailbox
;
mailbox:
addr_spec
| opt_phrase route_addr
{ $_[0]->YYData->{NAME}.=" $_[1]"; $_[2] }
;
addr_spec:
local_part tAt domain
{ "$_[1]$_[2]$_[3]" }
;
opt_phrase:
| phrase
;
phrase:
word
| phrase word
{ "$_[1] $_[2]" }
;
route_addr:
tAngLeft opt_route addr_spec tAngRight
{ $_[3] } # XXX Ignore route for now
;
opt_route:
routes tColon
|
;
routes:
routes tAt domain
| tAt domain
;
local_part:
local_part tDot word
{ "$_[1]$_[2]$_[3]" }
| word
;
domain:
domain tDot sub_domain
{ "$_[1]$_[2]$_[3]" }
| sub_domain
;
sub_domain:
domain_ref
/* | domain_literal */
;
domain_ref:
tAtom
;
word:
tAtom
| tQuotedString
;
%%
my %tokens=reverse(
tComma => ',',
tColon => ':',
tSemi => ';',
tAngLeft => '<',
tAngRight => '>',
tParLeft => '(',
tParRight => ')',
tBraLeft => '[',
tBraRight => ']',
tAt => '@',
tDot => '.',
);
my $tokens=join '',keys %tokens;
# Remove whitespace and comments
# This is done outside the lexer, since we call it before the first token
sub ParseComments{
my($parser)[EMAIL PROTECTED];
my $data=$parser->YYData;
for($data->{INPUT}){
while(s/^(\s+)// || /^\(/){
$data->{TEXT}.=$1;
if(s/^\(//){
my $level=1;
my $ctext='(';
while($level){
s/^([^()\\]+)//
and $ctext.=$1;
s/^((?:\\.)+)//
and $ctext.=$1;
s/^\(//
and $ctext.='(' and ++$level;
if(s/^\)//){
$ctext.=')';
last unless --$level;
}
}
$data->{COMMENT}.=" $ctext";
$data->{TEXT}.=$ctext;
}
}
}
}
# Debugging version
sub __Lexer{
my($parser)[EMAIL PROTECTED];
my @ret=&_Lexer;
local $"=',';
warn "Lex returned: (@ret)\n";
@ret;
}
sub _Lexer{
my($parser)[EMAIL PROTECTED];
my $data=$parser->YYData;
# Remove whitespace and comments
$parser->ParseComments;
# Determine next token
for($data->{INPUT}){
return ('',undef) if $_ eq '';
if(s/^([\Q$tokens\E])//o){
$data->{TEXT}.=$1 unless $1 eq ',';
return ($tokens{$1},$1);
}
if(s/^"//){
my $str;
while(1){
if(s/^"//){
$data->{TEXT}.=qq("$str");
return (tQuotedString => $str);
}elsif(s/^\\(.)//s){
$str.=$1;
}elsif(s/^([^\\"]+)//){
$str.=$1;
}else{
$data->{TEXT}.=qq("$str");
return (tQuotedString => $str);
}
}
}
if(s/^\\(.)//s){
$data->{TEXT}.="\\$1";
return (tQuotedPair => $1);
}
if(s/^([^\s\000-\037()<>\@,;\\".\[\]]+)//){
$data->{TEXT}.=$1;
return (tAtom => $1);
}
}
if(s/^(.)//s){
$data->{TEXT}.=$1;
return (tUnknown => $1);
}
}
sub _Error{
my($self)[EMAIL PROTECTED];
# XXX We're ignoring errors for now
# warn "Invalid message id in ".$self->YYData->{ALLINPUT}."\n";
}
sub Run{
my($self,$data)[EMAIL PROTECTED];
my $dref=$self->YYData;
%$dref=();
$dref->{ALLINPUT}=$dref->{INPUT}=$data;
$self->YYParse(
yylex => \&_Lexer,
yyerror => \&_Error,
# yydebug => 0x1f,
);
}