Like other things posted here without notices to the contrary, this code is in the public domain.
So lately I've been using this program to read my email. It's intended as a quick-and-dirty prototype of query-based mailreading. Apparently there's a program called 'mboxgrep' that does something similar, only it's in C. #!/usr/bin/perl -w use strict; use IO::File; use lib '/home/kragen/devel'; use MboxBackward; { package MessageFilter; sub new { my ($class, %args) = @_; bless \%args, $class } sub fromline { $_[0]{message} = $_[0]{fromline} = $_[1] } sub split_hdr { my ($name, $value) = split /:\s+/ , $_[1], 2; return (lc($name), $value); } sub save_hdr { my ($self, $name, $value) = @_; $self->{hdrs}{$name} = $value; push @{$self->{allhdrs}{$name}}, $value; } sub header { my ($self, $hdr) = @_; $self->save_hdr($self->split_hdr($hdr)); $_[0]{message} .= $hdr; } sub to { ($_[0]{hdrs}{to} || '') . ($_[0]{hdrs}{cc} || '') } sub bodyline { $_[0]{body} .= $_[1]; $_[0]{message} .= $_[1] } sub clear { my ($self) = @_; $self->{hdrs} = {}; $self->{body} = ''; $self->{allhdrs} = {}; $self->{fromline} = undef; $self->{message} = ''; } sub wanted { $_[0]{wanted}(@_) } sub match { ($_[0]{hdrs}{$_[1]} || '') =~ $_[2] } sub message { $_[0]{message} } } { package AllMsgs; use base qw(MessageFilter); sub wanted { 1 } } { package BriefMsgs; use base qw(MessageFilter); sub message { $_[0]->{fromline} . $_[0]->heading . $_[0]{brief_msg} } sub clear { my ($self) = @_; $self->SUPER::clear; $self->{brief_msg} = ''; $self->{topline} = ''; } sub bodyline { $_[0]->SUPER::bodyline($_[1]); $_[0]{brief_msg} .= $_[1] } my %omit_hdrs = map { $_ => 1 } qw(return-path delivered-to received message-id date subject from references in-reply-to x-priority mime-version content-type content-transfer-encoding x-msmail-priority importance x-virus-scanned x-beenthere x-mailman-version precedence list-id list-post list-help list-subscribe x-list-received-date x-original-to x-x-sender x-originalarrivaltime x-spam-checker-version errors-to content-disposition list-unsubscribe); sub header { my ($self, $hdr) = @_; my ($name, $value) = $self->split_hdr($hdr); $self->save_hdr($name, $value); $self->{brief_msg} .= $hdr unless $omit_hdrs{$name}; } sub heading { my ($self) = @_; my $from = $self->{hdrs}{from}; my $subj = $self->{hdrs}{subject}; chomp ($from, $subj); if (length($from . $subj) > 70) { return "From: $from\nSubject: $subj\n"; } else { return "From: $from $subj\n"; } } } my ($queryfile, $mailfile) = @ARGV; sub open_or_die { my ($file) = @_; my $rv = IO::File->new("<$file"); die "opening $file: $!" unless $rv; return $rv; } my $queryh = open_or_die($queryfile); my $query_obj = eval do { local $/; <$queryh> }; die $@ if $@; $queryh->close(); my $inhdr = 0; my $curhdr; sub emit_hdr { $query_obj->header($curhdr) if $curhdr; undef $curhdr; } my $mboxr = MboxBackward->new(open_or_die($mailfile)); while (my $msg = $mboxr->read()) { $query_obj->clear(); $inhdr = 1; for (split /\n/, $msg, -1) { # -1 to not drop trailing empty lines $_ .= "\n"; if (/^From /) { $query_obj->fromline($_) } elsif ($inhdr and /^$/) { emit_hdr; $inhdr = 0; $query_obj->bodyline($_) } elsif ($inhdr and /^\s+/) { $curhdr .= $_ } elsif ($inhdr) { emit_hdr; $curhdr = $_ } else { $query_obj->bodyline($_) } } print $query_obj->message if $query_obj->wanted; } __END__ It uses the MboxBackward.pm module, which follows: package MboxBackward; use strict; sub new { my ($class, $file, $size) = @_; seek $file, 0, 2 or die "Can't seek to EOF: $!"; bless { file => $file, size => $size || 4096, buf => '' }, $class; } sub read { my ($self) = @_; my $f = $self->{file}; my $size = $self->{size}; for (;;) { my $where = tell $self->{file}; if ($self->{buf} =~ /^From /m) { my $pos = length($self->{buf}); while (-1 != ($pos = rindex($self->{buf}, 'From ', $pos-5))) { if (($pos == 0) ? ($where == 0) : (substr($self->{buf}, $pos-1, 1) eq "\n")) { return substr($self->{buf}, $pos, length($self->{buf}), ''); } } } return undef if $where == 0; $size = $where if $where < $size; seek $f, -$size, 1 or die "Can't seek backwards: $!"; my $data; my $count = read $f, $data, $size; die "Can't read: $!" unless defined $count; $self->{buf} = $data . $self->{buf}; defined(seek $f, -$count, 1) or die "Can't seek: $!"; } } 1; __END__ You could probably do MboxBackward more easily with the existing read-a-file-backward-line-by-line module, but I was disconnected from the Net the afternoon I wrote it, and I can't find it right now on the Web. Maybe MJD hasn't published it yet. Here are some sample filter files: BriefMsgs->new(wanted => sub {$_[0]->match('list-archive' => qr/irregulars/i)}) MessageFilter->new(wanted => sub {$_[0]->to =~ /[EMAIL PROTECTED]/}) BriefMsgs->new(wanted => sub {$_[0]->match('delivered-to' => qr/fork\@/)}) BriefMsgs->new(wanted => sub { $_[0]->match(from => qr/rohit/i) }) { package ToMe; use base qw(BriefMsgs); sub wanted { my ($self) = @_; return ($self->to =~ /kragen\@/i and ($self->{hdrs}{'content-transfer-encoding'} || '') !~ /base64/ and $self->{hdrs}{from} !~ /priceline|marinedigital/i and ($self->{body} !~ /<html>/i ) and $self->{body} !~ m(Content-Type: text/html)i); } } bless {}, 'ToMe'; The main script itself doesn't have any regression tests, but the MboxBackward module does, because it was tricky. Here they are: #!/usr/bin/perl -w use strict; use Test; BEGIN { plan tests => 13 } use MboxBackward; my $msg1 = <<EOF; >From foo nurgle From bax From: kragen wumple EOF my $msg2 = <<EOF; >From bar zibble gab EOF my $fn = 'mbox.tmp'; sub write_tmp_file { my ($mbox) = @_; open TMP, ">$fn" or die "Can't open $fn: $!"; print TMP $mbox; close TMP; } sub open_or_die { my ($fn) = @_; local *MBOX; open MBOX, '<', $fn or die "Can't open $fn: $!"; return *MBOX; } write_tmp_file("baz\n$msg1$msg2"); my $r = MboxBackward->new(open_or_die($fn), 1); ok($r->read(), $msg2); ok($r->read(), $msg1); ok($r->read(), undef); ok($r->read(), undef); $r = MboxBackward->new(open_or_die($fn), 30); ok($r->read(), $msg2); ok($r->read(), $msg1); ok($r->read(), undef); $r = MboxBackward->new(open_or_die($fn), 1024); ok($r->read(), $msg2); ok($r->read(), $msg1); ok($r->read(), undef); write_tmp_file(""); ok( MboxBackward->new(open_or_die($fn))->read(), undef ); write_tmp_file($msg1); $r = MboxBackward->new(open_or_die($fn)); ok($r->read(), $msg1); ok($r->read(), undef);